From 69e542ad7eac99566f1b57e64193567b9a8943f1 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Mon, 30 May 2022 10:18:08 +1000 Subject: [PATCH 01/26] create cummax function --- docs/api/dt/cummax.rst | 88 ++++++++++++++++++++++++++ docs/api/fexpr.rst | 4 ++ docs/api/fexpr/cummax.rst | 7 +++ docs/api/index-api.rst | 3 + docs/releases/v1.1.0.rst | 3 + src/core/column/cummax.h | 93 ++++++++++++++++++++++++++++ src/core/documentation.h | 2 + src/core/expr/fexpr.cc | 10 +++ src/core/expr/fexpr.h | 1 + src/core/expr/fexpr_cummax.cc | 113 ++++++++++++++++++++++++++++++++++ src/datatable/__init__.py | 2 + 11 files changed, 326 insertions(+) create mode 100644 docs/api/dt/cummax.rst create mode 100644 docs/api/fexpr/cummax.rst create mode 100644 src/core/column/cummax.h create mode 100644 src/core/expr/fexpr_cummax.cc diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst new file mode 100644 index 0000000000..a6b838326b --- /dev/null +++ b/docs/api/dt/cummax.rst @@ -0,0 +1,88 @@ + +.. xfunction:: datatable.cummax + :src: src/core/expr/fexpr_cummax.cc pyfn_cummax + :tests: tests/dt/test-cummax.py + :cvar: doc_dt_cummax + :signature: cummax(cols) + + .. x-version-added:: 1.1.0 + + For each column from `cols` calculate cumulative max. The max of + the missing values is calculated as zero. In the presence of :func:`by()`, + the cumulative max is computed per group. + + Parameters + ---------- + cols: FExpr + Input data for cumulative max calculation. + + return: FExpr + f-expression that converts input columns into the columns filled + with the respective cumulative max. + + except: TypeError + The exception is raised when one of the columns from `cols` + has a non-numeric type. + + + Examples + -------- + + Create a sample datatable frame:: + + >>> from datatable import dt, f + >>> DT = dt.Frame({"A": [2, None, 5, -1, 0], + ... "B": [None, None, None, None, None], + ... "C": [5.4, 3, 2.2, 4.323, 3], + ... "D": ['a', 'a', 'b', 'b', 'b']}) + | A B C D + | int32 void float64 str32 + -- + ----- ---- ------- ----- + 0 | 2 NA 5.4 a + 1 | NA NA 3 a + 2 | 5 NA 2.2 b + 3 | -1 NA 4.323 b + 4 | 0 NA 3 b + [5 rows x 4 columns] + + + Calculate cumulative sum in a single column:: + + >>> DT[:, dt.cummax(f.A)] + | A + | int64 + -- + ----- + 0 | 2 + 1 | 2 + 2 | 7 + 3 | 6 + 4 | 6 + [5 rows x 1 column] + + + Calculate cumulative sums in multiple columns:: + + >>> DT[:, dt.cummax(f[:-1])] + | A B C + | int64 int64 float64 + -- + ----- ----- ------- + 0 | 2 0 5.4 + 1 | 2 0 8.4 + 2 | 7 0 10.6 + 3 | 6 0 14.923 + 4 | 6 0 17.923 + [5 rows x 3 columns] + + + Calculate cumulative sums per group in the presence of :func:`by()`:: + + >>> DT[:, dt.cummax(f[:]), by('D')] + | D A B C + | str32 int64 int64 float64 + -- + ----- ----- ----- ------- + 0 | a 2 0 5.4 + 1 | a 2 0 8.4 + 2 | b 5 0 2.2 + 3 | b 4 0 6.523 + 4 | b 4 0 9.523 + [5 rows x 4 columns] diff --git a/docs/api/fexpr.rst b/docs/api/fexpr.rst index 14bd7674ea..24789b753d 100644 --- a/docs/api/fexpr.rst +++ b/docs/api/fexpr.rst @@ -163,6 +163,9 @@ * - :meth:`.countna()` - Same as :func:`dt.countna()`. + * - :meth:`.cummax()` + - Same as :func:`dt.cummax()`. + * - :meth:`.cumsum()` - Same as :func:`dt.cumsum()`. @@ -289,6 +292,7 @@ .as_type() .count() .countna() + .cummax() .cumsum() .extend() .first() diff --git a/docs/api/fexpr/cummax.rst b/docs/api/fexpr/cummax.rst new file mode 100644 index 0000000000..bb7e377b29 --- /dev/null +++ b/docs/api/fexpr/cummax.rst @@ -0,0 +1,7 @@ + +.. xmethod:: datatable.FExpr.cummax + :src: src/core/expr/fexpr.cc PyFExpr::cummax + :cvar: doc_FExpr_cummax + :signature: cummax() + + Equivalent to :func:`dt.cummax(self)`. diff --git a/docs/api/index-api.rst b/docs/api/index-api.rst index 1d471d2d8a..869456b2ba 100644 --- a/docs/api/index-api.rst +++ b/docs/api/index-api.rst @@ -163,6 +163,8 @@ Functions - Count non-missing values per column * - :func:`countna()` - Count the number of NA values per column + * - :func:`cummax()` + - Calculate the cumulative max of values per column * - :func:`cumsum()` - Calculate the cumulative sum of values per column * - :func:`cov()` @@ -234,6 +236,7 @@ Other count()
countna()
cov()
+ cummax()
cumsum()
cut()
dt
diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 22ee9acef7..28f43bc5e4 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -71,6 +71,9 @@ -[new] Added reducer function :func:`dt.prod()` and the corresponding :meth:`.prod()` method to calculate product of values in columns. [#3140] + -[new] Added function :func:`dt.cummax()`, as well as :meth:`.cummax()` method, + to calculate the cumulative max of values per column. [#3279] + -[new] Added function :func:`dt.cumsum()`, as well as :meth:`.cumsum()` method, to calculate the cumulative sum of values per column. [#3279] diff --git a/src/core/column/cummax.h b/src/core/column/cummax.h new file mode 100644 index 0000000000..f81fb2c95d --- /dev/null +++ b/src/core/column/cummax.h @@ -0,0 +1,93 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#ifndef dt_COLUMN_CUMMAX_h +#define dt_COLUMN_CUMMAX_h +#include "column/virtual.h" +#include "parallel/api.h" +#include "stype.h" + +namespace dt { + + +template +class Cummax_ColumnImpl : public Virtual_ColumnImpl { + private: + Column col_; + Groupby gby_; + + public: + Cummax_ColumnImpl(Column&& col, const Groupby& gby) + : Virtual_ColumnImpl(col.nrows(), col.stype()), + col_(std::move(col)), + gby_(gby) + { + xassert(col_.can_be_read_as()); + } + + + void materialize(Column& col_out, bool) override { + Column col = Column::new_data_column(col_.nrows(), col_.stype()); + auto data = static_cast(col.get_data_editable()); + + auto offsets = gby_.offsets_r(); + dt::parallel_for_dynamic( + gby_.size(), + [&](size_t gi) { + size_t i1 = size_t(offsets[gi]); + size_t i2 = size_t(offsets[gi + 1]); + + T val; + bool is_valid = col_.get_element(i1, &val); + data[i1] = is_valid? val : 0; + + for (size_t i = i1 + 1; i < i2; ++i) { + is_valid = col_.get_element(i, &val); + val = is_valid? val : 0; + data[i] = data[i - 1] > val ? data[i - 1] : val; + } + + }); + + col_out = std::move(col); + } + + + ColumnImpl* clone() const override { + return new Cummax_ColumnImpl(Column(col_), gby_); + } + + size_t n_children() const noexcept override { + return 1; + } + + const Column& child(size_t i) const override { + xassert(i == 0); (void)i; + return col_; + } + +}; + + +} // namespace dt + + +#endif diff --git a/src/core/documentation.h b/src/core/documentation.h index 3f4560e3ae..b851075509 100644 --- a/src/core/documentation.h +++ b/src/core/documentation.h @@ -30,6 +30,7 @@ extern const char* doc_dt_corr; extern const char* doc_dt_count; extern const char* doc_dt_countna; extern const char* doc_dt_cov; +extern const char* doc_dt_cummax; extern const char* doc_dt_cumsum; extern const char* doc_dt_cut; extern const char* doc_dt_first; @@ -279,6 +280,7 @@ extern const char* doc_FExpr; extern const char* doc_FExpr_as_type; extern const char* doc_FExpr_count; extern const char* doc_FExpr_countna; +extern const char* doc_FExpr_cummax; extern const char* doc_FExpr_cumsum; extern const char* doc_FExpr_extend; extern const char* doc_FExpr_first; diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index 051f753001..35f4427ac3 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -588,6 +588,16 @@ DECLARE_METHOD(&PyFExpr::cumsum) ->name("cumsum") ->docs(dt::doc_FExpr_cumsum); + +oobj PyFExpr::cummax(const XArgs&) { + auto cummaxFn = oobj::import("datatable", "cummax"); + return cummaxFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::cummax) + ->name("cummax") + ->docs(dt::doc_FExpr_cummax); + //------------------------------------------------------------------------------ // Class decoration //------------------------------------------------------------------------------ diff --git a/src/core/expr/fexpr.h b/src/core/expr/fexpr.h index 8cc08916f6..6f940a3d3b 100644 --- a/src/core/expr/fexpr.h +++ b/src/core/expr/fexpr.h @@ -182,6 +182,7 @@ class PyFExpr : public py::XObject { py::oobj as_type(const py::XArgs&); py::oobj count(const py::XArgs&); py::oobj countna(const py::XArgs&); + py::oobj cummax(const py::XArgs&); py::oobj cumsum(const py::XArgs&); py::oobj extend(const py::XArgs&); py::oobj first(const py::XArgs&); diff --git a/src/core/expr/fexpr_cummax.cc b/src/core/expr/fexpr_cummax.cc new file mode 100644 index 0000000000..fedf780825 --- /dev/null +++ b/src/core/expr/fexpr_cummax.cc @@ -0,0 +1,113 @@ +//------------------------------------------------------------------------------ +// Copyright 2022 H2O.ai +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +//------------------------------------------------------------------------------ +#include "column/const.h" +#include "column/cummax.h" +#include "column/latent.h" +#include "documentation.h" +#include "expr/fexpr_func.h" +#include "expr/eval_context.h" +#include "expr/workframe.h" +#include "python/xargs.h" +#include "stype.h" + + +namespace dt { +namespace expr { + +class FExpr_cummax : public FExpr_Func { + private: + ptrExpr arg_; + + public: + FExpr_cummax(ptrExpr&& arg) + : arg_(std::move(arg)) {} + + std::string repr() const override{ + std::string out = "cummax("; + out += arg_->repr(); + out += ')'; + return out; + } + + + Workframe evaluate_n(EvalContext& ctx) const override{ + Workframe wf = arg_->evaluate_n(ctx); + Groupby gby = Groupby::single_group(wf.nrows()); + + if (ctx.has_groupby()) { + wf.increase_grouping_mode(Grouping::GtoALL); + gby = ctx.get_groupby(); + } + + for (size_t i = 0; i < wf.ncols(); ++i) { + Column coli = evaluate1(wf.retrieve_column(i), gby); + wf.replace_column(i, std::move(coli)); + } + return wf; + } + + + Column evaluate1(Column&& col, const Groupby& gby) const { + SType stype = col.stype(); + switch (stype) { + case SType::VOID: + case SType::BOOL: + case SType::INT8: + case SType::INT16: + case SType::INT32: + case SType::INT64: return make(std::move(col), SType::INT64, gby); + case SType::FLOAT32: return make(std::move(col), SType::FLOAT32, gby); + case SType::FLOAT64: return make(std::move(col), SType::FLOAT64, gby); + default: throw TypeError() + << "Invalid column of type " << stype << " in " << repr(); + } + } + + + template + Column make(Column&& col, SType stype, const Groupby& gby) const { + if (col.stype() == SType::VOID) { + return Column(new ConstInt_ColumnImpl(col.nrows(), 0, stype)); + } else { + col.cast_inplace(stype); + return Column(new Latent_ColumnImpl( + new Cummax_ColumnImpl(std::move(col), gby) + )); + } + } +}; + + +static py::oobj pyfn_cummax(const py::XArgs& args) { + auto cummax = args[0].to_oobj(); + return PyFExpr::make(new FExpr_cummax(as_fexpr(cummax))); +} + + +DECLARE_PYFN(&pyfn_cummax) + ->name("cummax") + ->docs(doc_dt_cummax) + ->arg_names({"cummax"}) + ->n_positional_args(1) + ->n_required_args(1); + +}} // dt::expr diff --git a/src/datatable/__init__.py b/src/datatable/__init__.py index 904ec7e0ad..904897cd8c 100644 --- a/src/datatable/__init__.py +++ b/src/datatable/__init__.py @@ -27,6 +27,7 @@ as_type, by, cbind, + cummax, cumsum, cut, fread, @@ -85,6 +86,7 @@ "corr", "count", "cov", + "cummax", "cumsum", "cut", "dt", From d612e3b6f78760fd4d39dadce277672678a0673f Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 31 May 2022 07:30:56 +1000 Subject: [PATCH 02/26] add tests --- docs/api/dt/cummax.rst | 28 +++++------ docs/releases/v1.1.0.rst | 6 +-- tests/dt/test-cummax.py | 102 +++++++++++++++++++++++++++++++++++++++ tests/test-f.py | 6 +++ 4 files changed, 125 insertions(+), 17 deletions(-) create mode 100644 tests/dt/test-cummax.py diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index a6b838326b..79c2eb13c4 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -46,7 +46,7 @@ [5 rows x 4 columns] - Calculate cumulative sum in a single column:: + Calculate cumulative maximum in a single column:: >>> DT[:, dt.cummax(f.A)] | A @@ -54,35 +54,35 @@ -- + ----- 0 | 2 1 | 2 - 2 | 7 - 3 | 6 - 4 | 6 + 2 | 5 + 3 | 5 + 4 | 5 [5 rows x 1 column] - Calculate cumulative sums in multiple columns:: + Calculate cumulative maximum in multiple columns:: >>> DT[:, dt.cummax(f[:-1])] | A B C | int64 int64 float64 -- + ----- ----- ------- - 0 | 2 0 5.4 - 1 | 2 0 8.4 - 2 | 7 0 10.6 - 3 | 6 0 14.923 - 4 | 6 0 17.923 + 0 | 2 0 5.4 + 1 | 2 0 5.4 + 2 | 5 0 5.4 + 3 | 5 0 5.4 + 4 | 5 0 5.4 [5 rows x 3 columns] - Calculate cumulative sums per group in the presence of :func:`by()`:: + Calculate cumulative maximum per group in the presence of :func:`by()`:: >>> DT[:, dt.cummax(f[:]), by('D')] | D A B C | str32 int64 int64 float64 -- + ----- ----- ----- ------- 0 | a 2 0 5.4 - 1 | a 2 0 8.4 + 1 | a 2 0 5.4 2 | b 5 0 2.2 - 3 | b 4 0 6.523 - 4 | b 4 0 9.523 + 3 | b 5 0 4.323 + 4 | b 5 0 4.323 [5 rows x 4 columns] diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 28f43bc5e4..94873b5d91 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -71,12 +71,12 @@ -[new] Added reducer function :func:`dt.prod()` and the corresponding :meth:`.prod()` method to calculate product of values in columns. [#3140] - -[new] Added function :func:`dt.cummax()`, as well as :meth:`.cummax()` method, - to calculate the cumulative max of values per column. [#3279] - -[new] Added function :func:`dt.cumsum()`, as well as :meth:`.cumsum()` method, to calculate the cumulative sum of values per column. [#3279] + -[new] Added function :func:`dt.cummax()`, as well as :meth:`.cummax()` method, + to calculate the cumulative max of values per column. [#3279] + -[enh] Added reducer functions :func:`dt.countna()` and :func:`dt.nunique()`. [#2999] -[new] Class :class:`dt.FExpr` now has method :meth:`.nunique()`, diff --git a/tests/dt/test-cummax.py b/tests/dt/test-cummax.py new file mode 100644 index 0000000000..13496ee873 --- /dev/null +++ b/tests/dt/test-cummax.py @@ -0,0 +1,102 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------- +# Copyright 2022 H2O.ai +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +#------------------------------------------------------------------------------- +import math +import pytest +from datatable import dt, f, cummax, FExpr, by +from tests import assert_equals + + +#------------------------------------------------------------------------------- +# Errors +#------------------------------------------------------------------------------- + +def test_cummax_non_numeric(): + DT = dt.Frame(list('abcde')) + with pytest.raises(TypeError, match = r'Invalid column of type str32 in cummax'): + DT[:, cummax(f[0])] + +def test_cummax_non_numeric_by(): + DT = dt.Frame(list('abcde')) + with pytest.raises(TypeError, match = r'Invalid column of type str32 in cummax'): + DT[:, cummax(f[0]), by(f[0])] + +def test_cummax_no_argument(): + match = r'Function datatable.cummax\(\) requires exactly 1 positional argument, ' \ + 'but none were given' + with pytest.raises(TypeError, match = match): + dt.cummax() + + +#------------------------------------------------------------------------------- +# Normal +#------------------------------------------------------------------------------- + +def test_cummax_str(): + assert str(cummax(f.A)) == "FExpr" + assert str(cummax(f.A) + 1) == "FExpr" + assert str(cummax(f.A + f.B)) == "FExpr" + assert str(cummax(f.B)) == "FExpr" + assert str(cummax(f[:2])) == "FExpr" + + +def test_cummax_empty_frame(): + DT = dt.Frame() + expr_cummax = cummax(DT) + assert isinstance(expr_cummax, FExpr) + assert_equals(DT[:, f[:]], DT) + + +def test_cummax_void(): + DT = dt.Frame([None, None, None]) + DT_cummax = DT[:, cummax(f[:])] + assert_equals(DT_cummax, dt.Frame([0, 0, 0]/dt.int64)) + + +def test_cummax_trivial(): + DT = dt.Frame([0]/dt.int64) + cummax_fexpr = cummax(f[:]) + DT_cummax = DT[:, cummax_fexpr] + assert isinstance(cummax_fexpr, FExpr) + assert_equals(DT, DT_cummax) + + +def test_cummax_small(): + DT = dt.Frame([range(5), [-1, 1, None, 2, 5.5]]) + DT_cummax = DT[:, cummax(f[:])] + DT_ref = dt.Frame([[0, 1, 2, 3, 4]/dt.int64, [-1, 1, 1, 2, 5.5]]) + assert_equals(DT_cummax, DT_ref) + + +def test_cummax_groupby(): + DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, 2, 3]]) + DT_cummax = DT[:, cummax(f[:]), by(f[0])] + DT_ref = dt.Frame([[1, 1, 1, 2, 2], [-1.5, math.inf, math.inf, 1.5, 3]/dt.float64]) + assert_equals(DT_cummax, DT_ref) + + +def test_cummax_grouped_column(): + DT = dt.Frame([2, 1, None, 1, 2]) + DT_cummax = DT[:, cummax(f[0]), by(f[0])] + DT_ref = dt.Frame([[None, 1, 1, 2, 2], [0, 1, 1, 2, 2]/dt.int64]) + assert_equals(DT_cummax, DT_ref) diff --git a/tests/test-f.py b/tests/test-f.py index d9c56696b4..8ca85b5ecc 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -444,3 +444,9 @@ def test_cumsum(): DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) assert_equals(DT[:, f.A.cumsum()], DT[:, dt.cumsum(f.A)]) + +def test_cummax(): + assert str(dt.cummax(f.A)) == str(f.A.cummax()) + assert str(dt.cummax(f[:])) == str(f[:].cummax()) + DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) + assert_equals(DT[:, f.A.cummax()], DT[:, dt.cummax(f.A)]) \ No newline at end of file From 59467662657e625c9d17c894ed3fce77e8e258bc Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 31 May 2022 07:33:10 +1000 Subject: [PATCH 03/26] add more details to examples --- docs/api/dt/cummax.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 79c2eb13c4..74a5f9ffd2 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -7,7 +7,7 @@ .. x-version-added:: 1.1.0 - For each column from `cols` calculate cumulative max. The max of + For each column from `cols` calculate cumulative max. The maximum of the missing values is calculated as zero. In the presence of :func:`by()`, the cumulative max is computed per group. From c1e2120d763e0adc11827361d838cd06c6948384 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 31 May 2022 07:33:24 +1000 Subject: [PATCH 04/26] add more details to examples --- docs/api/dt/cummax.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 74a5f9ffd2..4bf6e29e69 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -9,7 +9,7 @@ For each column from `cols` calculate cumulative max. The maximum of the missing values is calculated as zero. In the presence of :func:`by()`, - the cumulative max is computed per group. + the cumulative maximum is computed per group. Parameters ---------- From 8c4e8b5df35768a8cbedbc7738fd744ea69fc819 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 31 May 2022 07:33:45 +1000 Subject: [PATCH 05/26] add more details to examples --- docs/api/dt/cummax.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 4bf6e29e69..0e8a2c5305 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -14,11 +14,11 @@ Parameters ---------- cols: FExpr - Input data for cumulative max calculation. + Input data for cumulative maximum calculation. return: FExpr f-expression that converts input columns into the columns filled - with the respective cumulative max. + with the respective cumulative maximum. except: TypeError The exception is raised when one of the columns from `cols` From 2f8541dc422bf7af6c62301f2e7341c9ba5f918d Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 31 May 2022 07:34:06 +1000 Subject: [PATCH 06/26] add more details to examples --- docs/api/dt/cummax.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 0e8a2c5305..ae28421430 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -7,7 +7,7 @@ .. x-version-added:: 1.1.0 - For each column from `cols` calculate cumulative max. The maximum of + For each column from `cols` calculate cumulative maximum. The maximum of the missing values is calculated as zero. In the presence of :func:`by()`, the cumulative maximum is computed per group. From a6bcb9d7d008af57391be668718ba66ff153bf45 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 31 May 2022 07:35:18 +1000 Subject: [PATCH 07/26] add more details to examples --- docs/api/dt/cummax.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index ae28421430..14e675ce9e 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -46,7 +46,7 @@ [5 rows x 4 columns] - Calculate cumulative maximum in a single column:: + Calculate the cumulative maximum in a single column:: >>> DT[:, dt.cummax(f.A)] | A @@ -60,7 +60,7 @@ [5 rows x 1 column] - Calculate cumulative maximum in multiple columns:: + Calculate the cumulative maximum in multiple columns:: >>> DT[:, dt.cummax(f[:-1])] | A B C @@ -74,7 +74,7 @@ [5 rows x 3 columns] - Calculate cumulative maximum per group in the presence of :func:`by()`:: + Calculate the cumulative maximum per group in the presence of :func:`by()`:: >>> DT[:, dt.cummax(f[:]), by('D')] | D A B C From 1784cc1952b22c0d5ca6c972f1673c9d441905e6 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 31 May 2022 09:12:01 +1000 Subject: [PATCH 08/26] add more details to examples --- docs/api/index-api.rst | 2 +- docs/releases/v1.1.0.rst | 2 +- tests/dt/test-cummax.py | 2 ++ tests/test-f.py | 3 ++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/api/index-api.rst b/docs/api/index-api.rst index 869456b2ba..4da45341cc 100644 --- a/docs/api/index-api.rst +++ b/docs/api/index-api.rst @@ -164,7 +164,7 @@ Functions * - :func:`countna()` - Count the number of NA values per column * - :func:`cummax()` - - Calculate the cumulative max of values per column + - Calculate the cumulative maximum of values per column * - :func:`cumsum()` - Calculate the cumulative sum of values per column * - :func:`cov()` diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 94873b5d91..913a706303 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -75,7 +75,7 @@ to calculate the cumulative sum of values per column. [#3279] -[new] Added function :func:`dt.cummax()`, as well as :meth:`.cummax()` method, - to calculate the cumulative max of values per column. [#3279] + to calculate the cumulative maximum of values per column. [#3279] -[enh] Added reducer functions :func:`dt.countna()` and :func:`dt.nunique()`. [#2999] diff --git a/tests/dt/test-cummax.py b/tests/dt/test-cummax.py index 13496ee873..e091f005b9 100644 --- a/tests/dt/test-cummax.py +++ b/tests/dt/test-cummax.py @@ -100,3 +100,5 @@ def test_cummax_grouped_column(): DT_cummax = DT[:, cummax(f[0]), by(f[0])] DT_ref = dt.Frame([[None, 1, 1, 2, 2], [0, 1, 1, 2, 2]/dt.int64]) assert_equals(DT_cummax, DT_ref) + + diff --git a/tests/test-f.py b/tests/test-f.py index 8ca85b5ecc..9a4edb2381 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -449,4 +449,5 @@ def test_cummax(): assert str(dt.cummax(f.A)) == str(f.A.cummax()) assert str(dt.cummax(f[:])) == str(f[:].cummax()) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) - assert_equals(DT[:, f.A.cummax()], DT[:, dt.cummax(f.A)]) \ No newline at end of file + assert_equals(DT[:, f.A.cummax()], DT[:, dt.cummax(f.A)]) + From 7c74c77dfd5843bd7efc16959b60cc7d6c7a1122 Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Thu, 2 Jun 2022 16:05:46 +1000 Subject: [PATCH 09/26] Update src/core/expr/fexpr_cummax.cc Co-authored-by: oleksiyskononenko <35204136+oleksiyskononenko@users.noreply.github.com> --- src/core/expr/fexpr_cummax.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/expr/fexpr_cummax.cc b/src/core/expr/fexpr_cummax.cc index fedf780825..985c9a763f 100644 --- a/src/core/expr/fexpr_cummax.cc +++ b/src/core/expr/fexpr_cummax.cc @@ -86,7 +86,7 @@ class FExpr_cummax : public FExpr_Func { template Column make(Column&& col, SType stype, const Groupby& gby) const { if (col.stype() == SType::VOID) { - return Column(new ConstInt_ColumnImpl(col.nrows(), 0, stype)); + return Column(new ConstNa_ColumnImpl(col.nrows())); } else { col.cast_inplace(stype); return Column(new Latent_ColumnImpl( From 444630a8d3b7a4a5080306f914402ddf057eb0dc Mon Sep 17 00:00:00 2001 From: oleksiyskononenko <35204136+oleksiyskononenko@users.noreply.github.com> Date: Wed, 1 Jun 2022 23:03:56 -0700 Subject: [PATCH 10/26] Silence sphinx warning by setting language to 'en' (#3291) In sphinx config set language to `en` to silence the warning: ``` WARNING: Invalid configuration value found: 'language = None'. Update your configuration to a valid langauge code. Falling back to 'en' (English). ``` --- docs/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index b093de747d..936ccf0f81 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,7 +23,7 @@ # -- Project information ----------------------------------------------------- project = 'datatable' -copyright = '2018-2020, H2O.ai' +copyright = '2018-2022, H2O.ai' author = 'Pasha Stetsenko' try: @@ -83,7 +83,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. From 1bf58325558d4271121a630fc8b9e05710c63019 Mon Sep 17 00:00:00 2001 From: oleksiyskononenko <35204136+oleksiyskononenko@users.noreply.github.com> Date: Fri, 3 Jun 2022 00:18:38 -0700 Subject: [PATCH 11/26] Add support for void columns for all the row-functions (#3290) WIP for #3284 --- docs/_ext/xfunction.py | 5 ++- docs/releases/v1.1.0.rst | 3 ++ src/core/expr/fnary/fnary.cc | 22 ++++++---- src/core/expr/fnary/fnary.h | 46 ++++++++++++++++----- src/core/expr/fnary/rowall.cc | 19 +++++---- src/core/expr/fnary/rowany.cc | 28 ++++++------- src/core/expr/fnary/rowcount.cc | 8 ++-- src/core/expr/fnary/rowfirstlast.cc | 39 +++++++----------- src/core/expr/fnary/rowmean.cc | 9 ++-- src/core/expr/fnary/rowminmax.cc | 14 ++++++- src/core/expr/fnary/rowsd.cc | 9 ++-- src/core/expr/fnary/rowsum.cc | 10 +++-- tests/ijby/test-rowwise.py | 64 +++++++++++++++++++++++++++-- 13 files changed, 192 insertions(+), 84 deletions(-) diff --git a/docs/_ext/xfunction.py b/docs/_ext/xfunction.py index 712af84e5a..dd600aac94 100644 --- a/docs/_ext/xfunction.py +++ b/docs/_ext/xfunction.py @@ -1173,6 +1173,7 @@ def locate_cxx_function(name, kind, lines): r"\s*\(.*\)\s*" + r"(?:const\s*|noexcept\s*|override\s*)*" + r"\{\s*") + n_signature_lines = 5 # number of lines allowed for the function signature expect_closing = None istart = None ifinish = None @@ -1187,9 +1188,11 @@ def locate_cxx_function(name, kind, lines): if mm: expect_closing = mm.group(1) + "}" else: - mm = re.match(rx_start, line + lines[i+1]) + src = "".join(lines[i:i+n_signature_lines]) + mm = re.match(rx_start, src) if mm: expect_closing = mm.group(1) + "}" + if not istart: raise ValueError("Could not find %s `%s` in " % (kind, name)) if not expect_closing: diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 913a706303..7350ba4df8 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -100,6 +100,9 @@ -[fix] Reducer functions :func:`dt.prod()` and :func:`dt.sum()` can now be applied to :attr:`void ` columns. [#3281] [#3282] + -[fix] All the row-wise functions now support :attr:`void ` + columns. [#3284] + fread ----- diff --git a/src/core/expr/fnary/fnary.cc b/src/core/expr/fnary/fnary.cc index 52f68fa3c2..1174b837df 100644 --- a/src/core/expr/fnary/fnary.cc +++ b/src/core/expr/fnary/fnary.cc @@ -30,8 +30,8 @@ namespace dt { namespace expr { -FExpr_RowFn::FExpr_RowFn(ptrExpr&& args) - : args_(std::move(args)) +FExpr_RowFn::FExpr_RowFn(ptrExpr&& args, bool process_void_cols /* =false */) + : args_(std::move(args)), process_void_cols_(process_void_cols) {} @@ -47,15 +47,22 @@ std::string FExpr_RowFn::repr() const { Workframe FExpr_RowFn::evaluate_n(EvalContext& ctx) const { Workframe inputs = args_->evaluate_n(ctx); Grouping gmode = inputs.get_grouping_mode(); - std::vector columns; - columns.reserve(inputs.ncols()); - for (size_t i = 0; i < inputs.ncols(); ++i) { - columns.emplace_back(inputs.retrieve_column(i)); + colvec columns; + size_t ncols = inputs.ncols(); + size_t nrows = 1; + columns.reserve(ncols); + for (size_t i = 0; i < ncols; ++i) { + Column col = inputs.retrieve_column(i); + xassert(i == 0 || nrows == col.nrows()); + nrows = col.nrows(); + if (process_void_cols_ || !col.type().is_void()) { + columns.emplace_back(col); + } } Workframe out(ctx); out.add_column( - apply_function(std::move(columns)), + apply_function(std::move(columns), nrows, ncols), "", gmode ); return out; @@ -66,6 +73,7 @@ SType FExpr_RowFn::common_numeric_stype(const colvec& columns) const { SType common_stype = SType::INT32; for (size_t i = 0; i < columns.size(); ++i) { switch (columns[i].stype()) { + case SType::VOID: case SType::BOOL: case SType::INT8: case SType::INT16: diff --git a/src/core/expr/fnary/fnary.h b/src/core/expr/fnary/fnary.h index 85b4aea4fc..aecf12f36d 100644 --- a/src/core/expr/fnary/fnary.h +++ b/src/core/expr/fnary/fnary.h @@ -51,14 +51,23 @@ py::oobj py_rowfn(const py::XArgs& args); class FExpr_RowFn : public FExpr_Func { private: ptrExpr args_; + bool process_void_cols_; + size_t : 56; public: - FExpr_RowFn(ptrExpr&& args); + FExpr_RowFn(ptrExpr&& args, bool process_void_cols = false); std::string repr() const override; Workframe evaluate_n(EvalContext& ctx) const override; virtual std::string name() const = 0; - virtual Column apply_function(std::vector&& columns) const = 0; + virtual Column apply_function( + colvec&& columns, // columns to process; if `process_void_cols_` is `False` + // void columns are filtered out + const size_t nrows, // number of rows in the original input frame; needed in the case + // when all the columns are void and filtered out + const size_t ncols // number of columns in the original input frame, + // including the void columns + ) const = 0; SType common_numeric_stype(const colvec&) const; void promote_columns(colvec& columns, SType target_stype) const; @@ -71,7 +80,9 @@ class FExpr_RowAll : public FExpr_RowFn { using FExpr_RowFn::FExpr_RowFn; std::string name() const override; - Column apply_function(std::vector&& columns) const override; + Column apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const override; }; @@ -81,7 +92,9 @@ class FExpr_RowAny : public FExpr_RowFn { using FExpr_RowFn::FExpr_RowFn; std::string name() const override; - Column apply_function(std::vector&& columns) const override; + Column apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const override; }; @@ -91,7 +104,9 @@ class FExpr_RowCount : public FExpr_RowFn { using FExpr_RowFn::FExpr_RowFn; std::string name() const override; - Column apply_function(std::vector&& columns) const override; + Column apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const override; }; @@ -102,7 +117,9 @@ class FExpr_RowFirstLast : public FExpr_RowFn { using FExpr_RowFn::FExpr_RowFn; std::string name() const override; - Column apply_function(std::vector&& columns) const override; + Column apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const override; }; extern template class FExpr_RowFirstLast; @@ -115,8 +132,11 @@ class FExpr_RowMinMax : public FExpr_RowFn { public: using FExpr_RowFn::FExpr_RowFn; + FExpr_RowMinMax(ptrExpr&& args); std::string name() const override; - Column apply_function(std::vector&& columns) const override; + Column apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const override; }; extern template class FExpr_RowMinMax; @@ -131,7 +151,9 @@ class FExpr_RowMean : public FExpr_RowFn { using FExpr_RowFn::FExpr_RowFn; std::string name() const override; - Column apply_function(std::vector&& columns) const override; + Column apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const override; }; @@ -141,7 +163,9 @@ class FExpr_RowSd : public FExpr_RowFn { using FExpr_RowFn::FExpr_RowFn; std::string name() const override; - Column apply_function(std::vector&& columns) const override; + Column apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const override; }; @@ -151,7 +175,9 @@ class FExpr_RowSum : public FExpr_RowFn { using FExpr_RowFn::FExpr_RowFn; std::string name() const override; - Column apply_function(std::vector&& columns) const override; + Column apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const override; }; diff --git a/src/core/expr/fnary/rowall.cc b/src/core/expr/fnary/rowall.cc index 3e19b22268..7cece08163 100644 --- a/src/core/expr/fnary/rowall.cc +++ b/src/core/expr/fnary/rowall.cc @@ -49,17 +49,21 @@ static bool op_rowall(size_t i, int8_t* out, const colvec& columns) { } -Column FExpr_RowAll::apply_function(colvec&& columns) const { - if (columns.empty()) { +Column FExpr_RowAll::apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const +{ + // No columns + if (ncols == 0) { return Const_ColumnImpl::make_bool_column(1, true); } - size_t nrows = columns[0].nrows(); + // Some void columns + if (columns.size() != ncols) { + return Const_ColumnImpl::make_bool_column(nrows, false); + } + // No void columns for (size_t i = 0; i < columns.size(); ++i) { xassert(columns[i].nrows() == nrows); - // If there is even one void column, the result of `rowall()` is `false` - if (columns[i].type().is_void()) { - return Const_ColumnImpl::make_bool_column(nrows, false); - } if (!columns[i].type().is_boolean()) { throw TypeError() << "Function `rowall` requires a sequence of boolean " "columns, however column " << i << " has type `" @@ -70,6 +74,7 @@ Column FExpr_RowAll::apply_function(colvec&& columns) const { std::move(columns), op_rowall, nrows, SType::BOOL)); } + DECLARE_PYFN(&py_rowfn) ->name("rowall") ->docs(dt::doc_dt_rowall) diff --git a/src/core/expr/fnary/rowany.cc b/src/core/expr/fnary/rowany.cc index 29b0e8c9f1..5401b2497f 100644 --- a/src/core/expr/fnary/rowany.cc +++ b/src/core/expr/fnary/rowany.cc @@ -50,29 +50,25 @@ static bool op_rowany(size_t i, int8_t* out, const colvec& columns) { -Column FExpr_RowAny::apply_function(colvec&& columns) const { - size_t ncols = columns.size(); - size_t nrows = ncols? columns[0].nrows() : 1; - colvec columns_; - columns_.reserve(ncols); - - for (size_t i = 0; i < ncols; ++i) { +Column FExpr_RowAny::apply_function(colvec&& columns, + const size_t nrows, + const size_t ncols) const +{ + // No columns or all the columns are void + if (columns.empty()) { + // `ncols == 0` tests that the original input frame had no columns + return Const_ColumnImpl::make_bool_column(nrows, ncols == 0); + } + for (size_t i = 0; i < columns.size(); ++i) { xassert(columns[i].nrows() == nrows); - if (!columns[i].type().is_boolean_or_void()) { + if (!columns[i].type().is_boolean()) { throw TypeError() << "Function `rowany` requires a sequence of boolean " "columns, however column " << i << " has type `" << columns[i].stype() << "`"; } - // Filter out void columns, since they don't affect result of `rowany()` - if (columns[i].type().is_boolean()) { - columns_.push_back(std::move(columns[i])); - } - } - if (columns_.empty()) { - return Const_ColumnImpl::make_bool_column(nrows, columns.empty()); } return Column(new FuncNary_ColumnImpl( - std::move(columns_), op_rowany, nrows, SType::BOOL)); + std::move(columns), op_rowany, nrows, SType::BOOL)); } diff --git a/src/core/expr/fnary/rowcount.cc b/src/core/expr/fnary/rowcount.cc index 23f9a47018..a750a7f69a 100644 --- a/src/core/expr/fnary/rowcount.cc +++ b/src/core/expr/fnary/rowcount.cc @@ -50,11 +50,13 @@ static bool op_rowcount(size_t i, int32_t* out, const colvec& columns) { } -Column FExpr_RowCount::apply_function(colvec&& columns) const { +Column FExpr_RowCount::apply_function(colvec&& columns, + const size_t nrows, + const size_t) const +{ if (columns.empty()) { - return Const_ColumnImpl::make_int_column(1, 0, SType::INT32); + return Const_ColumnImpl::make_int_column(nrows, 0, SType::INT32); } - size_t nrows = columns[0].nrows(); for (size_t i = 0; i < columns.size(); ++i) { xassert(columns[i].nrows() == nrows); columns[i] = unaryop(Op::ISNA, std::move(columns[i])); diff --git a/src/core/expr/fnary/rowfirstlast.cc b/src/core/expr/fnary/rowfirstlast.cc index fb33d69968..3a1c9f3b6a 100644 --- a/src/core/expr/fnary/rowfirstlast.cc +++ b/src/core/expr/fnary/rowfirstlast.cc @@ -57,43 +57,34 @@ static inline Column _rowfirstlast(colvec&& columns, SType outtype) { template -Column FExpr_RowFirstLast::apply_function(colvec&& columns) const { - size_t ncols = columns.size(); - size_t nrows = ncols? columns[0].nrows() : 1; - colvec columns_; - columns_.reserve(ncols); - - for (size_t i = 0; i < ncols; ++i) { - // Filter out void columns, since they don't affect the result - if (!columns[i].type().is_void()) { - columns_.push_back(std::move(columns[i])); - } - } - - if (columns_.empty()) { +Column FExpr_RowFirstLast::apply_function(colvec&& columns, + const size_t nrows, + const size_t) const +{ + if (columns.empty()) { return Const_ColumnImpl::make_na_column(nrows); } // Detect common stype SType stype0 = SType::VOID; - for (const auto& col : columns_) { + for (const auto& col : columns) { stype0 = common_stype(stype0, col.stype()); } if (stype0 == SType::INVALID) { throw TypeError() << "Incompatible column types in function `" << name() << "`"; } - promote_columns(columns_, stype0); + promote_columns(columns, stype0); switch (stype0) { - case SType::BOOL: return _rowfirstlast(std::move(columns_), stype0); - case SType::INT8: return _rowfirstlast(std::move(columns_), stype0); - case SType::INT16: return _rowfirstlast(std::move(columns_), stype0); - case SType::INT32: return _rowfirstlast(std::move(columns_), stype0); - case SType::INT64: return _rowfirstlast(std::move(columns_), stype0); - case SType::FLOAT32: return _rowfirstlast(std::move(columns_), stype0); - case SType::FLOAT64: return _rowfirstlast(std::move(columns_), stype0); + case SType::BOOL: return _rowfirstlast(std::move(columns), stype0); + case SType::INT8: return _rowfirstlast(std::move(columns), stype0); + case SType::INT16: return _rowfirstlast(std::move(columns), stype0); + case SType::INT32: return _rowfirstlast(std::move(columns), stype0); + case SType::INT64: return _rowfirstlast(std::move(columns), stype0); + case SType::FLOAT32: return _rowfirstlast(std::move(columns), stype0); + case SType::FLOAT64: return _rowfirstlast(std::move(columns), stype0); case SType::STR32: - case SType::STR64: return _rowfirstlast(std::move(columns_), stype0); + case SType::STR64: return _rowfirstlast(std::move(columns), stype0); default: { throw TypeError() << "Function `" << name() << "` doesn't support type `" << stype0 << "`"; } diff --git a/src/core/expr/fnary/rowmean.cc b/src/core/expr/fnary/rowmean.cc index 627348af08..687d378a16 100644 --- a/src/core/expr/fnary/rowmean.cc +++ b/src/core/expr/fnary/rowmean.cc @@ -62,9 +62,12 @@ static inline Column _rowmean(colvec&& columns) { } -Column FExpr_RowMean::apply_function(colvec&& columns) const { +Column FExpr_RowMean::apply_function(colvec&& columns, + const size_t nrows, + const size_t) const +{ if (columns.empty()) { - return Const_ColumnImpl::make_na_column(1); + return Column(new ConstNa_ColumnImpl(nrows, SType::FLOAT64)); } SType res_stype = common_numeric_stype(columns); if (res_stype == SType::INT32 || res_stype == SType::INT64) { @@ -81,6 +84,7 @@ Column FExpr_RowMean::apply_function(colvec&& columns) const { } } + DECLARE_PYFN(&py_rowfn) ->name("rowmean") ->docs(doc_dt_rowmean) @@ -89,5 +93,4 @@ DECLARE_PYFN(&py_rowfn) - }} // namespace dt::expr diff --git a/src/core/expr/fnary/rowminmax.cc b/src/core/expr/fnary/rowminmax.cc index 19f77b5e0c..fbba48ae7f 100644 --- a/src/core/expr/fnary/rowminmax.cc +++ b/src/core/expr/fnary/rowminmax.cc @@ -33,6 +33,12 @@ namespace dt { namespace expr { +template +FExpr_RowMinMax::FExpr_RowMinMax(ptrExpr&& args) + : FExpr_RowFn(std::move(args), ARG) +{} + + template std::string FExpr_RowMinMax::name() const { if (ARG) { @@ -90,9 +96,12 @@ static inline Column _rowminmax(colvec&& columns) { template -Column FExpr_RowMinMax::apply_function(colvec&& columns) const { +Column FExpr_RowMinMax::apply_function(colvec&& columns, + const size_t nrows, + const size_t) const +{ if (columns.empty()) { - return Const_ColumnImpl::make_na_column(1); + return Const_ColumnImpl::make_na_column(nrows); } SType res_stype = common_numeric_stype(columns); promote_columns(columns, res_stype); @@ -108,6 +117,7 @@ Column FExpr_RowMinMax::apply_function(colvec&& columns) const { } } + template class FExpr_RowMinMax; template class FExpr_RowMinMax; template class FExpr_RowMinMax; diff --git a/src/core/expr/fnary/rowsd.cc b/src/core/expr/fnary/rowsd.cc index c8decd1b61..949bc5530b 100644 --- a/src/core/expr/fnary/rowsd.cc +++ b/src/core/expr/fnary/rowsd.cc @@ -66,9 +66,12 @@ static inline Column _rowsd(colvec&& columns) { } -Column FExpr_RowSd::apply_function(colvec&& columns) const { +Column FExpr_RowSd::apply_function(colvec&& columns, + const size_t nrows, + const size_t) const +{ if (columns.empty()) { - return Const_ColumnImpl::make_na_column(1); + return Column(new ConstNa_ColumnImpl(nrows, SType::FLOAT64)); } SType res_stype = common_numeric_stype(columns); if (res_stype == SType::INT32 || res_stype == SType::INT64) { @@ -85,6 +88,7 @@ Column FExpr_RowSd::apply_function(colvec&& columns) const { } } + DECLARE_PYFN(&py_rowfn) ->name("rowsd") ->docs(doc_dt_rowsd) @@ -93,5 +97,4 @@ DECLARE_PYFN(&py_rowfn) - }} // namespace dt::expr diff --git a/src/core/expr/fnary/rowsum.cc b/src/core/expr/fnary/rowsum.cc index 679449fd3d..316c5c3475 100644 --- a/src/core/expr/fnary/rowsum.cc +++ b/src/core/expr/fnary/rowsum.cc @@ -29,7 +29,6 @@ namespace dt { namespace expr { - std::string FExpr_RowSum::name() const { return "rowsum"; } @@ -58,9 +57,12 @@ static inline Column _rowsum(colvec&& columns) { } -Column FExpr_RowSum::apply_function(colvec&& columns) const { +Column FExpr_RowSum::apply_function(colvec&& columns, + const size_t nrows, + const size_t) const +{ if (columns.empty()) { - return Const_ColumnImpl::make_int_column(1, 0, SType::INT32); + return Const_ColumnImpl::make_int_column(nrows, 0, SType::INT32); } SType res_stype = common_numeric_stype(columns); promote_columns(columns, res_stype); @@ -76,6 +78,7 @@ Column FExpr_RowSum::apply_function(colvec&& columns) const { } } + DECLARE_PYFN(&py_rowfn) ->name("rowsum") ->docs(doc_dt_rowsum) @@ -84,5 +87,4 @@ DECLARE_PYFN(&py_rowfn) - }} // namespace dt::expr diff --git a/tests/ijby/test-rowwise.py b/tests/ijby/test-rowwise.py index 60dcfae737..e315c835b8 100644 --- a/tests/ijby/test-rowwise.py +++ b/tests/ijby/test-rowwise.py @@ -249,8 +249,6 @@ def test_rowfirstlast_incompatible_types(): - - #------------------------------------------------------------------------------- # rowmax(), rowmin() #------------------------------------------------------------------------------- @@ -267,6 +265,18 @@ def test_rowminmax_int8(): assert_equals(RES, dt.Frame([[4], [1]], stype=dt.int32)) +def test_rowminmax_void_column1(): + DT = dt.Frame([[None]] * 3) + RES = DT[:, [rowmax(f[:]), rowmin(f[:])]] + assert_equals(RES, dt.Frame([[None], [None]])) + + +def test_rowminmax_void_column2(): + DT = dt.Frame([[None], [None], [1.0], [None]]) + RES = DT[:, [rowmax(f[:]), rowmin(f[:])]] + assert_equals(RES, dt.Frame([[1.0], [1.0]])) + + def test_rowminmax_nas(): DT = dt.Frame([[None]] * 3, stype=dt.int64) RES = DT[:, [rowmax(f[:]), rowmin(f[:])]] @@ -309,6 +319,18 @@ def test_rowargminmax_int8(): assert_equals(RES, dt.Frame([[0], [2]], stype=dt.int64)) +def test_rowargminmax_void_column1(): + DT = dt.Frame([[None]] * 3) + RES = DT[:, [rowargmax(f[:]), rowargmin(f[:])]] + assert_equals(RES, dt.Frame([[None], [None]], stype=dt.int64)) + + +def test_rowargminmax_void_column2(): + DT = dt.Frame([[None], [None], [-100], [None], [1.0], [None]]) + RES = DT[:, [rowargmax(f[:]), rowargmin(f[:])]] + assert_equals(RES, dt.Frame([[4], [2]], stype=dt.int64)) + + def test_rowargminmax_nas(): DT = dt.Frame([[None]] * 3, stype=dt.int64) RES = DT[:, [rowargmax(f[:]), rowargmin(f[:])]] @@ -343,6 +365,18 @@ def test_rowmean_simple(): assert_equals(DT[:, rowmean(f[:])], dt.Frame(range(5), stype=dt.float64)) +def test_rowmean_void_column1(): + DT = dt.Frame([[None]] * 3) + RES = DT[:, rowmean(f[:])] + assert_equals(RES, dt.Frame([None], stype=dt.float64)) + + +def test_rowmean_void_column2(): + DT = dt.Frame([[None], [None], [4], [None], [6], [None]]) + RES = DT[:, rowmean(f[:])] + assert_equals(RES, dt.Frame([5], stype=dt.float64)) + + def test_rowmean_floats(): DT = dt.Frame([(1.5, 6.4, 0.0, None, 7.22), (2.0, -1.1, math.inf, 4.0, 3.2), @@ -361,7 +395,6 @@ def test_rowmean_wrong_types(): - #------------------------------------------------------------------------------- # rowsd() #------------------------------------------------------------------------------- @@ -372,6 +405,18 @@ def test_rowsd_single_column(): assert_equals(RES, dt.Frame([None]*5, type=float)) +def test_rowsd_void_column1(): + DT = dt.Frame([[None]] * 3) + RES = DT[:, rowsd(f[:])] + assert_equals(RES, dt.Frame([None], stype=dt.float64)) + + +def test_rowsd_void_column2(): + DT = dt.Frame([[None], [None], [6], [None], [6], [None]]) + RES = DT[:, rowsd(f[:])] + assert_equals(RES, dt.Frame([0], stype=dt.float64)) + + def test_rowsd_same_columns(): DT = dt.Frame([range(5)] * 10) RES = DT[:, rowsd(f[:])] @@ -397,11 +442,22 @@ def test_rowsd_wrong_types(): - #------------------------------------------------------------------------------- # rowsum() #------------------------------------------------------------------------------- +def test_rowsum_void_column1(): + DT = dt.Frame([[None]] * 3) + RES = DT[:, rowsum(f[:])] + assert_equals(RES, dt.Frame([0], stype=dt.int32)) + + +def test_rowsum_void_column2(): + DT = dt.Frame([[None], [4], [6], [None], [5], [None]]) + RES = DT[:, rowsum(f[:])] + assert_equals(RES, dt.Frame([15], stype=dt.int32)) + + def test_rowsum_bools(): DT = dt.Frame([[True, True, False, False, None, None], [True, False, True, False, True, None], From 0b6314a7d50424eb984a6f34c8be08732ac98589 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Sat, 4 Jun 2022 10:30:20 +1000 Subject: [PATCH 12/26] change logic when dealing with NAs --- src/core/column/cummax.h | 10 +++++++--- tests/dt/test-cummax.py | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/core/column/cummax.h b/src/core/column/cummax.h index f81fb2c95d..ecaeda6daf 100644 --- a/src/core/column/cummax.h +++ b/src/core/column/cummax.h @@ -57,12 +57,16 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { T val; bool is_valid = col_.get_element(i1, &val); - data[i1] = is_valid? val : 0; + data[i1] = is_valid? val : GETNA(); for (size_t i = i1 + 1; i < i2; ++i) { is_valid = col_.get_element(i, &val); - val = is_valid? val : 0; - data[i] = data[i - 1] > val ? data[i - 1] : val; + //val = is_valid? val : GETNA(); + if (is_valid) { + data[i] = data[i - 1] > val ? data[i - 1] : val; + } else{ + data[i] = data[i - 1]; + } } }); diff --git a/tests/dt/test-cummax.py b/tests/dt/test-cummax.py index e091f005b9..7e12ff4b22 100644 --- a/tests/dt/test-cummax.py +++ b/tests/dt/test-cummax.py @@ -70,7 +70,7 @@ def test_cummax_empty_frame(): def test_cummax_void(): DT = dt.Frame([None, None, None]) DT_cummax = DT[:, cummax(f[:])] - assert_equals(DT_cummax, dt.Frame([0, 0, 0]/dt.int64)) + assert_equals(DT_cummax, DT) def test_cummax_trivial(): @@ -98,7 +98,7 @@ def test_cummax_groupby(): def test_cummax_grouped_column(): DT = dt.Frame([2, 1, None, 1, 2]) DT_cummax = DT[:, cummax(f[0]), by(f[0])] - DT_ref = dt.Frame([[None, 1, 1, 2, 2], [0, 1, 1, 2, 2]/dt.int64]) + DT_ref = dt.Frame([[None, 1, 1, 2, 2], [None, 1, 1, 2, 2]/dt.int64]) assert_equals(DT_cummax, DT_ref) From e6142e06c7d16df0779e491111c4a700a1aa4c8e Mon Sep 17 00:00:00 2001 From: sammychoco Date: Sat, 4 Jun 2022 10:34:14 +1000 Subject: [PATCH 13/26] update examples --- docs/api/dt/cummax.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 14e675ce9e..6d03478bf0 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -63,26 +63,26 @@ Calculate the cumulative maximum in multiple columns:: >>> DT[:, dt.cummax(f[:-1])] - | A B C - | int64 int64 float64 - -- + ----- ----- ------- - 0 | 2 0 5.4 - 1 | 2 0 5.4 - 2 | 5 0 5.4 - 3 | 5 0 5.4 - 4 | 5 0 5.4 + | A B C + | int64 void float64 + -- + ----- ---- ------- + 0 | 2 NA 5.4 + 1 | 2 NA 5.4 + 2 | 5 NA 5.4 + 3 | 5 NA 5.4 + 4 | 5 NA 5.4 [5 rows x 3 columns] Calculate the cumulative maximum per group in the presence of :func:`by()`:: >>> DT[:, dt.cummax(f[:]), by('D')] - | D A B C - | str32 int64 int64 float64 - -- + ----- ----- ----- ------- - 0 | a 2 0 5.4 - 1 | a 2 0 5.4 - 2 | b 5 0 2.2 - 3 | b 5 0 4.323 - 4 | b 5 0 4.323 + | D A B C + | str32 int64 void float64 + -- + ----- ----- ---- ------- + 0 | a 2 NA 5.4 + 1 | a 2 NA 5.4 + 2 | b 5 NA 2.2 + 3 | b 5 NA 4.323 + 4 | b 5 NA 4.323 [5 rows x 4 columns] From cb1210dcaeef71acd54b52db57d1ce9a87786e35 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Sat, 4 Jun 2022 10:38:05 +1000 Subject: [PATCH 14/26] update examples --- docs/api/dt/cummax.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 6d03478bf0..4ac14e2f44 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -7,8 +7,7 @@ .. x-version-added:: 1.1.0 - For each column from `cols` calculate cumulative maximum. The maximum of - the missing values is calculated as zero. In the presence of :func:`by()`, + For each column from `cols` calculate cumulative maximum. In the presence of :func:`by()`, the cumulative maximum is computed per group. Parameters @@ -18,7 +17,7 @@ return: FExpr f-expression that converts input columns into the columns filled - with the respective cumulative maximum. + with the respective cumulative maximums. except: TypeError The exception is raised when one of the columns from `cols` From a3b4965293001a5a9df1cc0bed4674d5b734e929 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Sat, 4 Jun 2022 10:38:36 +1000 Subject: [PATCH 15/26] cleanup cummax.h --- src/core/column/cummax.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/core/column/cummax.h b/src/core/column/cummax.h index ecaeda6daf..96234217a7 100644 --- a/src/core/column/cummax.h +++ b/src/core/column/cummax.h @@ -61,7 +61,6 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { for (size_t i = i1 + 1; i < i2; ++i) { is_valid = col_.get_element(i, &val); - //val = is_valid? val : GETNA(); if (is_valid) { data[i] = data[i - 1] > val ? data[i - 1] : val; } else{ From 994a205f9b71dce332146e123784f157c501bf8c Mon Sep 17 00:00:00 2001 From: sammychoco Date: Sat, 4 Jun 2022 10:39:01 +1000 Subject: [PATCH 16/26] cleanup cummax.h --- src/core/column/cummax.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/core/column/cummax.h b/src/core/column/cummax.h index 96234217a7..b1a5bfed49 100644 --- a/src/core/column/cummax.h +++ b/src/core/column/cummax.h @@ -60,8 +60,7 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { data[i1] = is_valid? val : GETNA(); for (size_t i = i1 + 1; i < i2; ++i) { - is_valid = col_.get_element(i, &val); - if (is_valid) { + if (col_.get_element(i, &val)) { data[i] = data[i - 1] > val ? data[i - 1] : val; } else{ data[i] = data[i - 1]; From bf50084b379741e9eb4b61397dc247a657ea0818 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 7 Jun 2022 19:19:17 +1000 Subject: [PATCH 17/26] update stypes in cummax.cc --- src/core/column/cummax.h | 4 ++++ src/core/expr/fexpr_cummax.cc | 22 +++++++++------------- tests/dt/test-cummax.py | 4 ++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/core/column/cummax.h b/src/core/column/cummax.h index b1a5bfed49..bc32b56400 100644 --- a/src/core/column/cummax.h +++ b/src/core/column/cummax.h @@ -61,6 +61,10 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { for (size_t i = i1 + 1; i < i2; ++i) { if (col_.get_element(i, &val)) { + // store previous, present + // if previous is valid, and present is not valid then previous + // if previous is invalid, then present + // else compare previous and present to get max data[i] = data[i - 1] > val ? data[i - 1] : val; } else{ data[i] = data[i - 1]; diff --git a/src/core/expr/fexpr_cummax.cc b/src/core/expr/fexpr_cummax.cc index 985c9a763f..c0c376c867 100644 --- a/src/core/expr/fexpr_cummax.cc +++ b/src/core/expr/fexpr_cummax.cc @@ -69,14 +69,14 @@ class FExpr_cummax : public FExpr_Func { Column evaluate1(Column&& col, const Groupby& gby) const { SType stype = col.stype(); switch (stype) { - case SType::VOID: + case SType::VOID: return Column(new ConstNa_ColumnImpl(col.nrows())); case SType::BOOL: - case SType::INT8: - case SType::INT16: - case SType::INT32: - case SType::INT64: return make(std::move(col), SType::INT64, gby); - case SType::FLOAT32: return make(std::move(col), SType::FLOAT32, gby); - case SType::FLOAT64: return make(std::move(col), SType::FLOAT64, gby); + case SType::INT8: return make(std::move(col), gby); + case SType::INT16: return make(std::move(col), gby); + case SType::INT32: return make(std::move(col), gby); + case SType::INT64: return make(std::move(col), gby); + case SType::FLOAT32: return make(std::move(col), gby); + case SType::FLOAT64: return make(std::move(col), gby); default: throw TypeError() << "Invalid column of type " << stype << " in " << repr(); } @@ -84,15 +84,11 @@ class FExpr_cummax : public FExpr_Func { template - Column make(Column&& col, SType stype, const Groupby& gby) const { - if (col.stype() == SType::VOID) { - return Column(new ConstNa_ColumnImpl(col.nrows())); - } else { - col.cast_inplace(stype); + Column make(Column&& col, const Groupby& gby) const { return Column(new Latent_ColumnImpl( new Cummax_ColumnImpl(std::move(col), gby) )); - } + } }; diff --git a/tests/dt/test-cummax.py b/tests/dt/test-cummax.py index 7e12ff4b22..de93a118da 100644 --- a/tests/dt/test-cummax.py +++ b/tests/dt/test-cummax.py @@ -84,7 +84,7 @@ def test_cummax_trivial(): def test_cummax_small(): DT = dt.Frame([range(5), [-1, 1, None, 2, 5.5]]) DT_cummax = DT[:, cummax(f[:])] - DT_ref = dt.Frame([[0, 1, 2, 3, 4]/dt.int64, [-1, 1, 1, 2, 5.5]]) + DT_ref = dt.Frame([[0, 1, 2, 3, 4], [-1, 1, 1, 2, 5.5]]) assert_equals(DT_cummax, DT_ref) @@ -98,7 +98,7 @@ def test_cummax_groupby(): def test_cummax_grouped_column(): DT = dt.Frame([2, 1, None, 1, 2]) DT_cummax = DT[:, cummax(f[0]), by(f[0])] - DT_ref = dt.Frame([[None, 1, 1, 2, 2], [None, 1, 1, 2, 2]/dt.int64]) + DT_ref = dt.Frame([[None, 1, 1, 2, 2], [None, 1, 1, 2, 2]]) assert_equals(DT_cummax, DT_ref) From a59e3bec564090242db270c181f34a4707408b4a Mon Sep 17 00:00:00 2001 From: sammychoco Date: Tue, 7 Jun 2022 19:49:41 +1000 Subject: [PATCH 18/26] update logic for cummax --- src/core/column/cummax.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/src/core/column/cummax.h b/src/core/column/cummax.h index bc32b56400..740c0a4198 100644 --- a/src/core/column/cummax.h +++ b/src/core/column/cummax.h @@ -58,18 +58,25 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { T val; bool is_valid = col_.get_element(i1, &val); data[i1] = is_valid? val : GETNA(); + for (size_t i = i1 + 1; i < i2; ++i) { - if (col_.get_element(i, &val)) { - // store previous, present - // if previous is valid, and present is not valid then previous - // if previous is invalid, then present - // else compare previous and present to get max - data[i] = data[i - 1] > val ? data[i - 1] : val; - } else{ - data[i] = data[i - 1]; + bool next_is_valid = col_.get_element(i, &val); + if (!is_valid & next_is_valid){ + data[i] = val; + is_valid = next_is_valid; } - } + else if (!is_valid & !next_is_valid){ + data[i] = GETNA(); + } + else if (is_valid & !next_is_valid){ + data[i] = data[i-1]; + } + else { + data[i] = std::max(data[i - 1], val); + } + } + }); From b213fa154ef7da664f4681cd72f1331dad9b8678 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Wed, 8 Jun 2022 12:46:44 -0700 Subject: [PATCH 19/26] Cosmetics and a bool test --- src/core/column/cummax.h | 2 +- src/core/expr/fexpr_cummax.cc | 19 +++++++++---------- tests/dt/test-cummax.py | 7 +++++++ 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/core/column/cummax.h b/src/core/column/cummax.h index 740c0a4198..9575e2d317 100644 --- a/src/core/column/cummax.h +++ b/src/core/column/cummax.h @@ -75,7 +75,7 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { else { data[i] = std::max(data[i - 1], val); } - } + } }); diff --git a/src/core/expr/fexpr_cummax.cc b/src/core/expr/fexpr_cummax.cc index c0c376c867..1cd1bf6f6f 100644 --- a/src/core/expr/fexpr_cummax.cc +++ b/src/core/expr/fexpr_cummax.cc @@ -69,26 +69,25 @@ class FExpr_cummax : public FExpr_Func { Column evaluate1(Column&& col, const Groupby& gby) const { SType stype = col.stype(); switch (stype) { - case SType::VOID: return Column(new ConstNa_ColumnImpl(col.nrows())); + case SType::VOID: return Column(new ConstNa_ColumnImpl(col.nrows())); case SType::BOOL: - case SType::INT8: return make(std::move(col), gby); - case SType::INT16: return make(std::move(col), gby); - case SType::INT32: return make(std::move(col), gby); - case SType::INT64: return make(std::move(col), gby); + case SType::INT8: return make(std::move(col), gby); + case SType::INT16: return make(std::move(col), gby); + case SType::INT32: return make(std::move(col), gby); + case SType::INT64: return make(std::move(col), gby); case SType::FLOAT32: return make(std::move(col), gby); case SType::FLOAT64: return make(std::move(col), gby); default: throw TypeError() - << "Invalid column of type " << stype << " in " << repr(); + << "Invalid column of type `" << stype << "` in " << repr(); } } template Column make(Column&& col, const Groupby& gby) const { - return Column(new Latent_ColumnImpl( - new Cummax_ColumnImpl(std::move(col), gby) - )); - + return Column(new Latent_ColumnImpl( + new Cummax_ColumnImpl(std::move(col), gby) + )); } }; diff --git a/tests/dt/test-cummax.py b/tests/dt/test-cummax.py index de93a118da..1792f0d7a4 100644 --- a/tests/dt/test-cummax.py +++ b/tests/dt/test-cummax.py @@ -81,6 +81,13 @@ def test_cummax_trivial(): assert_equals(DT, DT_cummax) +def test_cummax_bool(): + DT = dt.Frame([False, None, True, False, True]) + DT_cummax = DT[:, cummax(f[:])] + print(DT_cummax) + # assert_equals(DT_cummax, DT) + + def test_cummax_small(): DT = dt.Frame([range(5), [-1, 1, None, 2, 5.5]]) DT_cummax = DT[:, cummax(f[:])] From 02dcf40ffc43854a15ed41c44abe72a4a96dcfab Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Thu, 9 Jun 2022 14:58:28 -0700 Subject: [PATCH 20/26] Simplify NA logic --- src/core/column/cummax.h | 24 ++++++++---------------- tests/dt/test-cummax.py | 11 +++++------ 2 files changed, 13 insertions(+), 22 deletions(-) diff --git a/src/core/column/cummax.h b/src/core/column/cummax.h index 9575e2d317..f5c8345b3a 100644 --- a/src/core/column/cummax.h +++ b/src/core/column/cummax.h @@ -56,24 +56,16 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { size_t i2 = size_t(offsets[gi + 1]); T val; - bool is_valid = col_.get_element(i1, &val); - data[i1] = is_valid? val : GETNA(); - + bool res_valid = col_.get_element(i1, &val); + data[i1] = res_valid? val : GETNA(); for (size_t i = i1 + 1; i < i2; ++i) { - bool next_is_valid = col_.get_element(i, &val); - if (!is_valid & next_is_valid){ - data[i] = val; - is_valid = next_is_valid; - } - else if (!is_valid & !next_is_valid){ - data[i] = GETNA(); - } - else if (is_valid & !next_is_valid){ - data[i] = data[i-1]; - } - else { - data[i] = std::max(data[i - 1], val); + bool is_valid = col_.get_element(i, &val); + if (is_valid) { + data[i] = (res_valid && data[i - 1] > val)? data[i - 1] : val; + res_valid = true; + } else { + data[i] = data[i - 1]; } } diff --git a/tests/dt/test-cummax.py b/tests/dt/test-cummax.py index 1792f0d7a4..b6df184872 100644 --- a/tests/dt/test-cummax.py +++ b/tests/dt/test-cummax.py @@ -82,21 +82,20 @@ def test_cummax_trivial(): def test_cummax_bool(): - DT = dt.Frame([False, None, True, False, True]) + DT = dt.Frame([None, False, None, True, False, True]) DT_cummax = DT[:, cummax(f[:])] - print(DT_cummax) - # assert_equals(DT_cummax, DT) + assert_equals(DT_cummax, dt.Frame([None, False, False, True, True, True])) def test_cummax_small(): - DT = dt.Frame([range(5), [-1, 1, None, 2, 5.5]]) + DT = dt.Frame([range(5), [None, -1, None, 5.5, 3]]) DT_cummax = DT[:, cummax(f[:])] - DT_ref = dt.Frame([[0, 1, 2, 3, 4], [-1, 1, 1, 2, 5.5]]) + DT_ref = dt.Frame([[0, 1, 2, 3, 4], [None, -1, -1, 5.5, 5.5]]) assert_equals(DT_cummax, DT_ref) def test_cummax_groupby(): - DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, 2, 3]]) + DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, None, 3]]) DT_cummax = DT[:, cummax(f[:]), by(f[0])] DT_ref = dt.Frame([[1, 1, 1, 2, 2], [-1.5, math.inf, math.inf, 1.5, 3]/dt.float64]) assert_equals(DT_cummax, DT_ref) From 0fa7ad1c2d544ce024df995aedc76c1f62c1ff63 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Thu, 9 Jun 2022 20:35:17 -0700 Subject: [PATCH 21/26] Implement templated cummin/cummax --- docs/api/dt/cummin.rst | 87 +++++++++++ docs/api/fexpr.rst | 4 + docs/api/fexpr/cummin.rst | 7 + docs/api/index-api.rst | 1 + docs/releases/v1.1.0.rst | 5 +- src/core/column/{cummax.h => cumminmax.h} | 18 ++- src/core/documentation.h | 2 + src/core/expr/fexpr.cc | 10 ++ src/core/expr/fexpr.h | 1 + .../{fexpr_cummax.cc => fexpr_cumminmax.cc} | 33 ++++- src/datatable/__init__.py | 2 + tests/dt/test-cummax.py | 110 -------------- tests/dt/test-cumminmax.py | 138 ++++++++++++++++++ tests/dt/test-cumsum.py | 2 +- tests/dt/test-qcut.py | 2 +- 15 files changed, 293 insertions(+), 129 deletions(-) create mode 100644 docs/api/dt/cummin.rst create mode 100644 docs/api/fexpr/cummin.rst rename src/core/column/{cummax.h => cumminmax.h} (84%) rename src/core/expr/{fexpr_cummax.cc => fexpr_cumminmax.cc} (81%) delete mode 100644 tests/dt/test-cummax.py create mode 100644 tests/dt/test-cumminmax.py diff --git a/docs/api/dt/cummin.rst b/docs/api/dt/cummin.rst new file mode 100644 index 0000000000..19c5f05431 --- /dev/null +++ b/docs/api/dt/cummin.rst @@ -0,0 +1,87 @@ + +.. xfunction:: datatable.cummin + :src: src/core/expr/fexpr_cumminmax.cc pyfn_cummin + :tests: tests/dt/test-cumminmax.py + :cvar: doc_dt_cummin + :signature: cummin(cols) + + .. x-version-added:: 1.1.0 + + For each column from `cols` calculate cumulative minimum. In the presence of :func:`by()`, + the cumulative minimum is computed per group. + + Parameters + ---------- + cols: FExpr + Input data for cumulative minimum calculation. + + return: FExpr + f-expression that converts input columns into the columns filled + with the respective cumulative minimums. + + except: TypeError + The exception is raised when one of the columns from `cols` + has a non-numeric type. + + + Examples + -------- + + Create a sample datatable frame:: + + >>> from datatable import dt, f + >>> DT = dt.Frame({"A": [2, None, 5, -1, 0], + ... "B": [None, None, None, None, None], + ... "C": [5.4, 3, 2.2, 4.323, 3], + ... "D": ['a', 'a', 'b', 'b', 'b']}) + | A B C D + | int32 void float64 str32 + -- + ----- ---- ------- ----- + 0 | 2 NA 5.4 a + 1 | NA NA 3 a + 2 | 5 NA 2.2 b + 3 | -1 NA 4.323 b + 4 | 0 NA 3 b + [5 rows x 4 columns] + + + Calculate the cumulative maximum in a single column:: + + >>> DT[:, dt.cummax(f.A)] + | A + | int64 + -- + ----- + 0 | 2 + 1 | 2 + 2 | 5 + 3 | 5 + 4 | 5 + [5 rows x 1 column] + + + Calculate the cumulative maximum in multiple columns:: + + >>> DT[:, dt.cummax(f[:-1])] + | A B C + | int64 void float64 + -- + ----- ---- ------- + 0 | 2 NA 5.4 + 1 | 2 NA 5.4 + 2 | 5 NA 5.4 + 3 | 5 NA 5.4 + 4 | 5 NA 5.4 + [5 rows x 3 columns] + + + Calculate the cumulative maximum per group in the presence of :func:`by()`:: + + >>> DT[:, dt.cummax(f[:]), by('D')] + | D A B C + | str32 int64 void float64 + -- + ----- ----- ---- ------- + 0 | a 2 NA 5.4 + 1 | a 2 NA 5.4 + 2 | b 5 NA 2.2 + 3 | b 5 NA 4.323 + 4 | b 5 NA 4.323 + [5 rows x 4 columns] diff --git a/docs/api/fexpr.rst b/docs/api/fexpr.rst index 24789b753d..2237c2b513 100644 --- a/docs/api/fexpr.rst +++ b/docs/api/fexpr.rst @@ -163,6 +163,9 @@ * - :meth:`.countna()` - Same as :func:`dt.countna()`. + * - :meth:`.cummin()` + - Same as :func:`dt.cummin()`. + * - :meth:`.cummax()` - Same as :func:`dt.cummax()`. @@ -292,6 +295,7 @@ .as_type() .count() .countna() + .cummin() .cummax() .cumsum() .extend() diff --git a/docs/api/fexpr/cummin.rst b/docs/api/fexpr/cummin.rst new file mode 100644 index 0000000000..e3a138c3ac --- /dev/null +++ b/docs/api/fexpr/cummin.rst @@ -0,0 +1,7 @@ + +.. xmethod:: datatable.FExpr.cummin + :src: src/core/expr/fexpr.cc PyFExpr::cummin + :cvar: doc_FExpr_cummin + :signature: cummin() + + Equivalent to :func:`dt.cummin(self)`. diff --git a/docs/api/index-api.rst b/docs/api/index-api.rst index 4dd9fb4c83..48bcf3633d 100644 --- a/docs/api/index-api.rst +++ b/docs/api/index-api.rst @@ -236,6 +236,7 @@ Other count()
countna()
cov()
+ cummin()
cummax()
cumsum()
cut()
diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 338ed90393..3de13c798f 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -74,8 +74,9 @@ -[new] Added function :func:`dt.cumsum()`, as well as :meth:`.cumsum()` method, to calculate the cumulative sum of values per column. [#3279] - -[new] Added function :func:`dt.cummax()`, as well as :meth:`.cummax()` method, - to calculate the cumulative maximum of values per column. [#3279] + -[new] Added functions :func:`dt.cummin()` and :func:`dt.cummax()`, as well as + the corresponding :meth:`.cummin()` and :meth:`.cummax()` methods, + to calculate the cumulative minimum and maximum of values per column. [#3279] -[enh] Added reducer functions :func:`dt.countna()` and :func:`dt.nunique()`. [#2999] diff --git a/src/core/column/cummax.h b/src/core/column/cumminmax.h similarity index 84% rename from src/core/column/cummax.h rename to src/core/column/cumminmax.h index f5c8345b3a..ca0ebfb4b6 100644 --- a/src/core/column/cummax.h +++ b/src/core/column/cumminmax.h @@ -19,8 +19,8 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS // IN THE SOFTWARE. //------------------------------------------------------------------------------ -#ifndef dt_COLUMN_CUMMAX_h -#define dt_COLUMN_CUMMAX_h +#ifndef dt_COLUMN_CUMMINMAX_h +#define dt_COLUMN_CUMMINMAX_h #include "column/virtual.h" #include "parallel/api.h" #include "stype.h" @@ -28,14 +28,14 @@ namespace dt { -template -class Cummax_ColumnImpl : public Virtual_ColumnImpl { +template +class CumMinMax_ColumnImpl : public Virtual_ColumnImpl { private: Column col_; Groupby gby_; public: - Cummax_ColumnImpl(Column&& col, const Groupby& gby) + CumMinMax_ColumnImpl(Column&& col, const Groupby& gby) : Virtual_ColumnImpl(col.nrows(), col.stype()), col_(std::move(col)), gby_(gby) @@ -62,7 +62,11 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { for (size_t i = i1 + 1; i < i2; ++i) { bool is_valid = col_.get_element(i, &val); if (is_valid) { - data[i] = (res_valid && data[i - 1] > val)? data[i - 1] : val; + if (MIN) { + data[i] = (res_valid && data[i - 1] < val)? data[i - 1] : val; + } else { + data[i] = (res_valid && data[i - 1] > val)? data[i - 1] : val; + } res_valid = true; } else { data[i] = data[i - 1]; @@ -77,7 +81,7 @@ class Cummax_ColumnImpl : public Virtual_ColumnImpl { ColumnImpl* clone() const override { - return new Cummax_ColumnImpl(Column(col_), gby_); + return new CumMinMax_ColumnImpl(Column(col_), gby_); } size_t n_children() const noexcept override { diff --git a/src/core/documentation.h b/src/core/documentation.h index b851075509..57b918e6ec 100644 --- a/src/core/documentation.h +++ b/src/core/documentation.h @@ -30,6 +30,7 @@ extern const char* doc_dt_corr; extern const char* doc_dt_count; extern const char* doc_dt_countna; extern const char* doc_dt_cov; +extern const char* doc_dt_cummin; extern const char* doc_dt_cummax; extern const char* doc_dt_cumsum; extern const char* doc_dt_cut; @@ -280,6 +281,7 @@ extern const char* doc_FExpr; extern const char* doc_FExpr_as_type; extern const char* doc_FExpr_count; extern const char* doc_FExpr_countna; +extern const char* doc_FExpr_cummin; extern const char* doc_FExpr_cummax; extern const char* doc_FExpr_cumsum; extern const char* doc_FExpr_extend; diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index 35f4427ac3..6fc068d5fe 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -589,6 +589,16 @@ DECLARE_METHOD(&PyFExpr::cumsum) ->docs(dt::doc_FExpr_cumsum); +oobj PyFExpr::cummin(const XArgs&) { + auto cumminFn = oobj::import("datatable", "cummin"); + return cumminFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::cummin) + ->name("cummin") + ->docs(dt::doc_FExpr_cummin); + + oobj PyFExpr::cummax(const XArgs&) { auto cummaxFn = oobj::import("datatable", "cummax"); return cummaxFn.call({this}); diff --git a/src/core/expr/fexpr.h b/src/core/expr/fexpr.h index 6f940a3d3b..80015fb6e0 100644 --- a/src/core/expr/fexpr.h +++ b/src/core/expr/fexpr.h @@ -182,6 +182,7 @@ class PyFExpr : public py::XObject { py::oobj as_type(const py::XArgs&); py::oobj count(const py::XArgs&); py::oobj countna(const py::XArgs&); + py::oobj cummin(const py::XArgs&); py::oobj cummax(const py::XArgs&); py::oobj cumsum(const py::XArgs&); py::oobj extend(const py::XArgs&); diff --git a/src/core/expr/fexpr_cummax.cc b/src/core/expr/fexpr_cumminmax.cc similarity index 81% rename from src/core/expr/fexpr_cummax.cc rename to src/core/expr/fexpr_cumminmax.cc index 1cd1bf6f6f..f3b407aedc 100644 --- a/src/core/expr/fexpr_cummax.cc +++ b/src/core/expr/fexpr_cumminmax.cc @@ -20,7 +20,7 @@ // IN THE SOFTWARE. //------------------------------------------------------------------------------ #include "column/const.h" -#include "column/cummax.h" +#include "column/cumminmax.h" #include "column/latent.h" #include "documentation.h" #include "expr/fexpr_func.h" @@ -33,23 +33,25 @@ namespace dt { namespace expr { -class FExpr_cummax : public FExpr_Func { +template +class FExpr_CumMinMax : public FExpr_Func { private: ptrExpr arg_; public: - FExpr_cummax(ptrExpr&& arg) + FExpr_CumMinMax(ptrExpr&& arg) : arg_(std::move(arg)) {} - std::string repr() const override{ - std::string out = "cummax("; + std::string repr() const override { + std::string out = MIN? "cummin" : "cummax"; + out += '('; out += arg_->repr(); out += ')'; return out; } - Workframe evaluate_n(EvalContext& ctx) const override{ + Workframe evaluate_n(EvalContext& ctx) const override { Workframe wf = arg_->evaluate_n(ctx); Groupby gby = Groupby::single_group(wf.nrows()); @@ -86,7 +88,7 @@ class FExpr_cummax : public FExpr_Func { template Column make(Column&& col, const Groupby& gby) const { return Column(new Latent_ColumnImpl( - new Cummax_ColumnImpl(std::move(col), gby) + new CumMinMax_ColumnImpl(std::move(col), gby) )); } }; @@ -94,7 +96,13 @@ class FExpr_cummax : public FExpr_Func { static py::oobj pyfn_cummax(const py::XArgs& args) { auto cummax = args[0].to_oobj(); - return PyFExpr::make(new FExpr_cummax(as_fexpr(cummax))); + return PyFExpr::make(new FExpr_CumMinMax(as_fexpr(cummax))); +} + + +static py::oobj pyfn_cummin(const py::XArgs& args) { + auto cummin = args[0].to_oobj(); + return PyFExpr::make(new FExpr_CumMinMax(as_fexpr(cummin))); } @@ -105,4 +113,13 @@ DECLARE_PYFN(&pyfn_cummax) ->n_positional_args(1) ->n_required_args(1); + +DECLARE_PYFN(&pyfn_cummin) + ->name("cummin") + ->docs(doc_dt_cummin) + ->arg_names({"cummin"}) + ->n_positional_args(1) + ->n_required_args(1); + + }} // dt::expr diff --git a/src/datatable/__init__.py b/src/datatable/__init__.py index 904897cd8c..4cb7b03a36 100644 --- a/src/datatable/__init__.py +++ b/src/datatable/__init__.py @@ -27,6 +27,7 @@ as_type, by, cbind, + cummin, cummax, cumsum, cut, @@ -86,6 +87,7 @@ "corr", "count", "cov", + "cummin", "cummax", "cumsum", "cut", diff --git a/tests/dt/test-cummax.py b/tests/dt/test-cummax.py deleted file mode 100644 index b6df184872..0000000000 --- a/tests/dt/test-cummax.py +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -#------------------------------------------------------------------------------- -# Copyright 2022 H2O.ai -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -#------------------------------------------------------------------------------- -import math -import pytest -from datatable import dt, f, cummax, FExpr, by -from tests import assert_equals - - -#------------------------------------------------------------------------------- -# Errors -#------------------------------------------------------------------------------- - -def test_cummax_non_numeric(): - DT = dt.Frame(list('abcde')) - with pytest.raises(TypeError, match = r'Invalid column of type str32 in cummax'): - DT[:, cummax(f[0])] - -def test_cummax_non_numeric_by(): - DT = dt.Frame(list('abcde')) - with pytest.raises(TypeError, match = r'Invalid column of type str32 in cummax'): - DT[:, cummax(f[0]), by(f[0])] - -def test_cummax_no_argument(): - match = r'Function datatable.cummax\(\) requires exactly 1 positional argument, ' \ - 'but none were given' - with pytest.raises(TypeError, match = match): - dt.cummax() - - -#------------------------------------------------------------------------------- -# Normal -#------------------------------------------------------------------------------- - -def test_cummax_str(): - assert str(cummax(f.A)) == "FExpr" - assert str(cummax(f.A) + 1) == "FExpr" - assert str(cummax(f.A + f.B)) == "FExpr" - assert str(cummax(f.B)) == "FExpr" - assert str(cummax(f[:2])) == "FExpr" - - -def test_cummax_empty_frame(): - DT = dt.Frame() - expr_cummax = cummax(DT) - assert isinstance(expr_cummax, FExpr) - assert_equals(DT[:, f[:]], DT) - - -def test_cummax_void(): - DT = dt.Frame([None, None, None]) - DT_cummax = DT[:, cummax(f[:])] - assert_equals(DT_cummax, DT) - - -def test_cummax_trivial(): - DT = dt.Frame([0]/dt.int64) - cummax_fexpr = cummax(f[:]) - DT_cummax = DT[:, cummax_fexpr] - assert isinstance(cummax_fexpr, FExpr) - assert_equals(DT, DT_cummax) - - -def test_cummax_bool(): - DT = dt.Frame([None, False, None, True, False, True]) - DT_cummax = DT[:, cummax(f[:])] - assert_equals(DT_cummax, dt.Frame([None, False, False, True, True, True])) - - -def test_cummax_small(): - DT = dt.Frame([range(5), [None, -1, None, 5.5, 3]]) - DT_cummax = DT[:, cummax(f[:])] - DT_ref = dt.Frame([[0, 1, 2, 3, 4], [None, -1, -1, 5.5, 5.5]]) - assert_equals(DT_cummax, DT_ref) - - -def test_cummax_groupby(): - DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, None, 3]]) - DT_cummax = DT[:, cummax(f[:]), by(f[0])] - DT_ref = dt.Frame([[1, 1, 1, 2, 2], [-1.5, math.inf, math.inf, 1.5, 3]/dt.float64]) - assert_equals(DT_cummax, DT_ref) - - -def test_cummax_grouped_column(): - DT = dt.Frame([2, 1, None, 1, 2]) - DT_cummax = DT[:, cummax(f[0]), by(f[0])] - DT_ref = dt.Frame([[None, 1, 1, 2, 2], [None, 1, 1, 2, 2]]) - assert_equals(DT_cummax, DT_ref) - - diff --git a/tests/dt/test-cumminmax.py b/tests/dt/test-cumminmax.py new file mode 100644 index 0000000000..05830c2071 --- /dev/null +++ b/tests/dt/test-cumminmax.py @@ -0,0 +1,138 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +#------------------------------------------------------------------------------- +# Copyright 2022 H2O.ai +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +#------------------------------------------------------------------------------- +import math +import pytest +from datatable import dt, f, cummax, cummin, FExpr, by +from tests import assert_equals + + +#------------------------------------------------------------------------------- +# Errors +#------------------------------------------------------------------------------- + +@pytest.mark.parametrize("mm", [cummin, cummax]) +def test_cumminmax_non_numeric(mm): + DT = dt.Frame(list('abcde')) + with pytest.raises(TypeError, + match = r'Invalid column of type str32 in ' + mm.__name__): + DT[:, mm(f[0])] + + +@pytest.mark.parametrize("mm", [cummin, cummax]) +def test_cumminmax_non_numeric_by(mm): + DT = dt.Frame(list('abcde')) + with pytest.raises(TypeError, + match = r'Invalid column of type str32 in ' + mm.__name__): + DT[:, mm(f[0]), by(f[0])] + + +@pytest.mark.parametrize("mm", [cummin, cummax]) +def test_cumminmax_no_argument(mm): + msg = (f"Function datatable.{mm.__name__}" + "\\(\\) requires exactly 1 positional argument, but none were given") + with pytest.raises(TypeError, match = msg): + mm() + + +#------------------------------------------------------------------------------- +# Normal +#------------------------------------------------------------------------------- + +@pytest.mark.parametrize("mm", [cummin, cummax]) +def test_cumminmax_str(mm): + assert str(mm(f.A)) == "FExpr<" + mm.__name__ + "(f.A)>" + assert str(mm(f.A) + 1) == "FExpr<" + mm.__name__ + "(f.A) + 1>" + assert str(mm(f.A + f.B)) == "FExpr<" + mm.__name__ + "(f.A + f.B)>" + assert str(mm(f.B)) == "FExpr<" + mm.__name__ + "(f.B)>" + assert str(mm(f[:2])) == "FExpr<"+ mm.__name__ + "(f[:2])>" + + +@pytest.mark.parametrize("mm", [cummin, cummax]) +def test_cumminmax_empty_frame(mm): + DT = dt.Frame() + expr_mm = mm(DT) + assert isinstance(expr_mm, FExpr) + assert_equals(DT[:, mm(f[:])], DT) + + +@pytest.mark.parametrize("mm", [cummin, cummax]) +def test_cumminmax_void(mm): + DT = dt.Frame([None, None, None]) + DT_mm = DT[:, mm(f[:])] + assert_equals(DT_mm, DT) + + +@pytest.mark.parametrize("mm", [cummin, cummax]) +def test_cumminmax_trivial(mm): + DT = dt.Frame([0]/dt.int64) + mm_fexpr = mm(f[:]) + DT_mm = DT[:, mm_fexpr] + assert isinstance(mm_fexpr, FExpr) + assert_equals(DT, DT_mm) + + +def test_cumminmax_bool(): + DT = dt.Frame([None, False, None, True, False, True]) + DT_mm = DT[:, [cummin(f[:]), cummax(f[:])]] + DT_ref = dt.Frame([ + [None, False, False, False, False, False], + [None, False, False, True, True, True] + ]) + assert_equals(DT_mm, DT_ref) + + +def test_cumminmax_small(): + DT = dt.Frame([range(5), [None, -1, None, 5.5, 3]]) + DT_cummax = DT[:, [cummin(f[:]), cummax(f[:])]] + DT_ref = dt.Frame([ + [0, 0, 0, 0, 0]/dt.int32, + [None, -1, -1, -1, -1]/dt.float64, + [0, 1, 2, 3, 4], + [None, -1, -1, 5.5, 5.5] + ]) + assert_equals(DT_cummax, DT_ref) + + +def test_cumminmax_groupby(): + DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, None, 3]]) + DT_cummax = DT[:, [cummin(f[:]), cummax(f[:])], by(f[0])] + DT_ref = dt.Frame([ + [1, 1, 1, 2, 2], + [-1.5, -1.5, -1.5, 1.5, 1.5], + [-1.5, math.inf, math.inf, 1.5, 3] + ]) + assert_equals(DT_cummax, DT_ref) + + +def test_cumminmax_grouped_column(): + DT = dt.Frame([2, 1, None, 1, 2]) + DT_cummax = DT[:, [cummin(f[0]), cummax(f[0])], by(f[0])] + DT_ref = dt.Frame([ + [None, 1, 1, 2, 2], + [None, 1, 1, 2, 2], + [None, 1, 1, 2, 2] + ]) + assert_equals(DT_cummax, DT_ref) + + diff --git a/tests/dt/test-cumsum.py b/tests/dt/test-cumsum.py index 92ed3defbe..e67cbb1008 100644 --- a/tests/dt/test-cumsum.py +++ b/tests/dt/test-cumsum.py @@ -64,7 +64,7 @@ def test_cumsum_empty_frame(): DT = dt.Frame() expr_cumsum = cumsum(DT) assert isinstance(expr_cumsum, FExpr) - assert_equals(DT[:, f[:]], DT) + assert_equals(DT[:, cumsum(f[:])], DT) def test_cumsum_void(): diff --git a/tests/dt/test-qcut.py b/tests/dt/test-qcut.py index dbd0f65cc8..47904ab7bd 100644 --- a/tests/dt/test-qcut.py +++ b/tests/dt/test-qcut.py @@ -108,7 +108,7 @@ def test_qcut_empty_frame(): DT = dt.Frame() expr_qcut = qcut(DT) assert isinstance(expr_qcut, FExpr) - assert_equals(DT[:, f[:]], DT) + assert_equals(DT[:, qcut(f[:])], DT) def test_qcut_zerorow_frame(): From 24cbf53899ca0ff4b8c53a815791c12abb9c6b3a Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Thu, 9 Jun 2022 20:39:53 -0700 Subject: [PATCH 22/26] Fix docs --- docs/api/dt/cummax.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 4ac14e2f44..8a7942c647 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -1,7 +1,7 @@ .. xfunction:: datatable.cummax - :src: src/core/expr/fexpr_cummax.cc pyfn_cummax - :tests: tests/dt/test-cummax.py + :src: src/core/expr/fexpr_cumminmax.cc pyfn_cummax + :tests: tests/dt/test-cumminmax.py :cvar: doc_dt_cummax :signature: cummax(cols) From 69f824dec46002e45ed860e6eccdd068140c3e20 Mon Sep 17 00:00:00 2001 From: Oleksiy Kononenko Date: Thu, 9 Jun 2022 21:24:17 -0700 Subject: [PATCH 23/26] docs: per group -> within each group --- docs/api/dt/cummax.rst | 4 ++-- docs/api/dt/cummin.rst | 4 ++-- docs/api/dt/cumsum.rst | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 8a7942c647..779b2f8a67 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -8,7 +8,7 @@ .. x-version-added:: 1.1.0 For each column from `cols` calculate cumulative maximum. In the presence of :func:`by()`, - the cumulative maximum is computed per group. + the cumulative maximum is computed within each group. Parameters ---------- @@ -73,7 +73,7 @@ [5 rows x 3 columns] - Calculate the cumulative maximum per group in the presence of :func:`by()`:: + In the presence of :func:`by()` calculate the cumulative maximum within each group:: >>> DT[:, dt.cummax(f[:]), by('D')] | D A B C diff --git a/docs/api/dt/cummin.rst b/docs/api/dt/cummin.rst index 19c5f05431..7624dbfc06 100644 --- a/docs/api/dt/cummin.rst +++ b/docs/api/dt/cummin.rst @@ -8,7 +8,7 @@ .. x-version-added:: 1.1.0 For each column from `cols` calculate cumulative minimum. In the presence of :func:`by()`, - the cumulative minimum is computed per group. + the cumulative minimum is computed within each group. Parameters ---------- @@ -73,7 +73,7 @@ [5 rows x 3 columns] - Calculate the cumulative maximum per group in the presence of :func:`by()`:: + In the presence of :func:`by()` calculate the cumulative maximum within each group:: >>> DT[:, dt.cummax(f[:]), by('D')] | D A B C diff --git a/docs/api/dt/cumsum.rst b/docs/api/dt/cumsum.rst index 26a4096dbc..0a85a627b9 100644 --- a/docs/api/dt/cumsum.rst +++ b/docs/api/dt/cumsum.rst @@ -9,7 +9,7 @@ For each column from `cols` calculate cumulative sum. The sum of the missing values is calculated as zero. In the presence of :func:`by()`, - the cumulative sum is computed per group. + the cumulative sum is computed within each group. Parameters ---------- @@ -74,7 +74,7 @@ [5 rows x 3 columns] - Calculate cumulative sums per group in the presence of :func:`by()`:: + In the presence of :func:`by()` calculate cumulative sums within each group:: >>> DT[:, dt.cumsum(f[:]), by('D')] | D A B C From 9ddfa7d655829758bf082ca10713f5cd5ead9172 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Sat, 11 Jun 2022 02:50:41 +1000 Subject: [PATCH 24/26] update examples for cummin --- docs/api/dt/cummax.rst | 6 +++--- docs/api/dt/cummin.rst | 42 +++++++++++++++++++++--------------------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 779b2f8a67..c34aed5020 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -49,7 +49,7 @@ >>> DT[:, dt.cummax(f.A)] | A - | int64 + | int32 -- + ----- 0 | 2 1 | 2 @@ -63,7 +63,7 @@ >>> DT[:, dt.cummax(f[:-1])] | A B C - | int64 void float64 + | int32 void float64 -- + ----- ---- ------- 0 | 2 NA 5.4 1 | 2 NA 5.4 @@ -77,7 +77,7 @@ >>> DT[:, dt.cummax(f[:]), by('D')] | D A B C - | str32 int64 void float64 + | str32 int32 void float64 -- + ----- ----- ---- ------- 0 | a 2 NA 5.4 1 | a 2 NA 5.4 diff --git a/docs/api/dt/cummin.rst b/docs/api/dt/cummin.rst index 7624dbfc06..2d9876b347 100644 --- a/docs/api/dt/cummin.rst +++ b/docs/api/dt/cummin.rst @@ -45,43 +45,43 @@ [5 rows x 4 columns] - Calculate the cumulative maximum in a single column:: + Calculate the cumulative minimum in a single column:: - >>> DT[:, dt.cummax(f.A)] + >>> DT[:, dt.cummin(f.A)] | A - | int64 + | int32 -- + ----- 0 | 2 1 | 2 - 2 | 5 - 3 | 5 - 4 | 5 + 2 | 2 + 3 | -1 + 4 | -1 [5 rows x 1 column] - Calculate the cumulative maximum in multiple columns:: + Calculate the cumulative minimum in multiple columns:: - >>> DT[:, dt.cummax(f[:-1])] + >>> DT[:, dt.cummin(f[:-1])] | A B C - | int64 void float64 + | int32 void float64 -- + ----- ---- ------- 0 | 2 NA 5.4 - 1 | 2 NA 5.4 - 2 | 5 NA 5.4 - 3 | 5 NA 5.4 - 4 | 5 NA 5.4 + 1 | 2 NA 3 + 2 | 2 NA 2.2 + 3 | -1 NA 2.2 + 4 | -1 NA 2.2 [5 rows x 3 columns] - In the presence of :func:`by()` calculate the cumulative maximum within each group:: + In the presence of :func:`by()` calculate the cumulative minimum within each group:: - >>> DT[:, dt.cummax(f[:]), by('D')] + >>> DT[:, dt.cummin(f[:]), by('D')] | D A B C - | str32 int64 void float64 + | str32 int32 void float64 -- + ----- ----- ---- ------- - 0 | a 2 NA 5.4 - 1 | a 2 NA 5.4 - 2 | b 5 NA 2.2 - 3 | b 5 NA 4.323 - 4 | b 5 NA 4.323 + 0 | a 2 NA 5.4 + 1 | a 2 NA 3 + 2 | b 5 NA 2.2 + 3 | b -1 NA 2.2 + 4 | b -1 NA 2.2 [5 rows x 4 columns] From 035e46fd6aafa152ec66e09a51ebea1a8d1f86d2 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Sat, 11 Jun 2022 02:53:24 +1000 Subject: [PATCH 25/26] add test for f.cummin --- tests/test-f.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test-f.py b/tests/test-f.py index 9a4edb2381..873c0ad1fa 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -451,3 +451,8 @@ def test_cummax(): DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) assert_equals(DT[:, f.A.cummax()], DT[:, dt.cummax(f.A)]) +def test_cummin(): + assert str(dt.cummin(f.A)) == str(f.A.cummin()) + assert str(dt.cummin(f[:])) == str(f[:].cummin()) + DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) + assert_equals(DT[:, f.A.cummin()], DT[:, dt.cummin(f.A)]) \ No newline at end of file From 184a11ea8c20e1e76494078e3795d71c3082dba8 Mon Sep 17 00:00:00 2001 From: sammychoco Date: Sat, 11 Jun 2022 12:20:15 +1000 Subject: [PATCH 26/26] updates based on feedback --- docs/api/index-api.rst | 4 +- docs/releases/v1.1.0.rst | 3 - src/core/documentation.h | 4 +- src/core/expr/fexpr.cc | 241 ++++++++++++++++++-------------------- src/datatable/__init__.py | 4 +- tests/test-f.py | 3 +- 6 files changed, 125 insertions(+), 134 deletions(-) diff --git a/docs/api/index-api.rst b/docs/api/index-api.rst index 48bcf3633d..8ad197244f 100644 --- a/docs/api/index-api.rst +++ b/docs/api/index-api.rst @@ -165,6 +165,8 @@ Functions - Count the number of NA values per column * - :func:`cummax()` - Calculate the cumulative maximum of values per column + * - :func:`cummin()` + - Calculate the cumulative minimum of values per column * - :func:`cumsum()` - Calculate the cumulative sum of values per column * - :func:`cov()` @@ -236,8 +238,8 @@ Other count()
countna()
cov()
- cummin()
cummax()
+ cummin()
cumsum()
cut()
dt
diff --git a/docs/releases/v1.1.0.rst b/docs/releases/v1.1.0.rst index 3de13c798f..8566e16c5d 100644 --- a/docs/releases/v1.1.0.rst +++ b/docs/releases/v1.1.0.rst @@ -101,9 +101,6 @@ -[fix] Reducers and row-wise functions now support :attr:`void ` columns. [#3284] - -[fix] All the row-wise functions now support :attr:`void ` - columns. [#3284] - fread ----- diff --git a/src/core/documentation.h b/src/core/documentation.h index 57b918e6ec..8d4a52f391 100644 --- a/src/core/documentation.h +++ b/src/core/documentation.h @@ -30,8 +30,8 @@ extern const char* doc_dt_corr; extern const char* doc_dt_count; extern const char* doc_dt_countna; extern const char* doc_dt_cov; -extern const char* doc_dt_cummin; extern const char* doc_dt_cummax; +extern const char* doc_dt_cummin; extern const char* doc_dt_cumsum; extern const char* doc_dt_cut; extern const char* doc_dt_first; @@ -281,8 +281,8 @@ extern const char* doc_FExpr; extern const char* doc_FExpr_as_type; extern const char* doc_FExpr_count; extern const char* doc_FExpr_countna; -extern const char* doc_FExpr_cummin; extern const char* doc_FExpr_cummax; +extern const char* doc_FExpr_cummin; extern const char* doc_FExpr_cumsum; extern const char* doc_FExpr_extend; extern const char* doc_FExpr_first; diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index 6fc068d5fe..53e89b049c 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -295,26 +295,88 @@ DECLARE_METHOD(&PyFExpr::re_match) // Miscellaneous //------------------------------------------------------------------------------ -oobj PyFExpr::sum(const XArgs&) { - auto sumFn = oobj::import("datatable", "sum"); - return sumFn.call({this}); + +oobj PyFExpr::as_type(const XArgs& args) { + auto as_typeFn = oobj::import("datatable", "as_type"); + oobj new_type = args[0].to_oobj(); + return as_typeFn.call({this, new_type}); } -DECLARE_METHOD(&PyFExpr::sum) - ->name("sum") - ->docs(dt::doc_FExpr_sum); +DECLARE_METHOD(&PyFExpr::as_type) + ->name("as_type") + ->docs(dt::doc_FExpr_as_type) + ->arg_names({"new_type"}) + ->n_positional_args(1) + ->n_required_args(1); -oobj PyFExpr::prod(const XArgs&) { - auto prodFn = oobj::import("datatable", "prod"); - return prodFn.call({this}); +oobj PyFExpr::count(const XArgs&) { + auto countFn = oobj::import("datatable", "count"); + return countFn.call({this}); } -DECLARE_METHOD(&PyFExpr::prod) - ->name("prod") - ->docs(dt::doc_FExpr_prod); +DECLARE_METHOD(&PyFExpr::count) + ->name("count") + ->docs(dt::doc_FExpr_count); + + +oobj PyFExpr::countna(const XArgs&) { + auto countnaFn = oobj::import("datatable", "countna"); + return countnaFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::countna) + ->name("countna") + ->docs(dt::doc_FExpr_countna); + +oobj PyFExpr::cummax(const XArgs&) { + auto cummaxFn = oobj::import("datatable", "cummax"); + return cummaxFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::cummax) + ->name("cummax") + ->docs(dt::doc_FExpr_cummax); + +oobj PyFExpr::cummin(const XArgs&) { + auto cumminFn = oobj::import("datatable", "cummin"); + return cumminFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::cummin) + ->name("cummin") + ->docs(dt::doc_FExpr_cummin); + + +oobj PyFExpr::cumsum(const XArgs&) { + auto cumsumFn = oobj::import("datatable", "cumsum"); + return cumsumFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::cumsum) + ->name("cumsum") + ->docs(dt::doc_FExpr_cumsum); + + +oobj PyFExpr::first(const XArgs&) { + auto firstFn = oobj::import("datatable", "first"); + return firstFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::first) + ->name("first") + ->docs(dt::doc_FExpr_first); +oobj PyFExpr::last(const XArgs&) { + auto lastFn = oobj::import("datatable", "last"); + return lastFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::last) + ->name("last") + ->docs(dt::doc_FExpr_last); + oobj PyFExpr::max(const XArgs&) { auto maxFn = oobj::import("datatable", "max"); return maxFn.call({this}); @@ -358,6 +420,25 @@ DECLARE_METHOD(&PyFExpr::min) ->docs(dt::doc_FExpr_min); +oobj PyFExpr::nunique(const XArgs&) { + auto nuniqueFn = oobj::import("datatable", "nunique"); + return nuniqueFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::nunique) + ->name("nunique") + ->docs(dt::doc_FExpr_nunique); + +oobj PyFExpr::prod(const XArgs&) { + auto prodFn = oobj::import("datatable", "prod"); + return prodFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::prod) + ->name("prod") + ->docs(dt::doc_FExpr_prod); + + oobj PyFExpr::rowall(const XArgs&) { auto rowallFn = oobj::import("datatable", "rowall"); @@ -379,6 +460,23 @@ DECLARE_METHOD(&PyFExpr::rowany) ->name("rowany") ->docs(dt::doc_FExpr_rowany); +oobj PyFExpr::rowargmax(const XArgs&) { + auto rowargmaxFn = oobj::import("datatable", "rowargmax"); + return rowargmaxFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::rowargmax) + ->name("rowargmax") + ->docs(dt::doc_FExpr_rowargmax); + +oobj PyFExpr::rowargmin(const XArgs&) { + auto rowargminFn = oobj::import("datatable", "rowargmin"); + return rowargminFn.call({this}); +} + +DECLARE_METHOD(&PyFExpr::rowargmin) + ->name("rowargmin") + ->docs(dt::doc_FExpr_rowargmin); oobj PyFExpr::rowcount(const XArgs&) { @@ -413,14 +511,6 @@ DECLARE_METHOD(&PyFExpr::rowlast) ->docs(dt::doc_FExpr_rowlast); -oobj PyFExpr::rowargmax(const XArgs&) { - auto rowargmaxFn = oobj::import("datatable", "rowargmax"); - return rowargmaxFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::rowargmax) - ->name("rowargmax") - ->docs(dt::doc_FExpr_rowargmax); oobj PyFExpr::rowmax(const XArgs&) { @@ -444,15 +534,6 @@ DECLARE_METHOD(&PyFExpr::rowmean) ->docs(dt::doc_FExpr_rowmean); -oobj PyFExpr::rowargmin(const XArgs&) { - auto rowargminFn = oobj::import("datatable", "rowargmin"); - return rowargminFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::rowargmin) - ->name("rowargmin") - ->docs(dt::doc_FExpr_rowargmin); - oobj PyFExpr::rowmin(const XArgs&) { auto rowminFn = oobj::import("datatable", "rowmin"); @@ -464,7 +545,6 @@ DECLARE_METHOD(&PyFExpr::rowmin) ->docs(dt::doc_FExpr_rowmin); - oobj PyFExpr::rowsd(const XArgs&) { auto rowsdFn = oobj::import("datatable", "rowsd"); return rowsdFn.call({this}); @@ -486,7 +566,6 @@ DECLARE_METHOD(&PyFExpr::rowsum) ->docs(dt::doc_FExpr_rowsum); - oobj PyFExpr::sd(const XArgs&) { auto sdFn = oobj::import("datatable", "sd"); return sdFn.call({this}); @@ -497,7 +576,6 @@ DECLARE_METHOD(&PyFExpr::sd) ->docs(dt::doc_FExpr_sd); - oobj PyFExpr::shift(const XArgs& args) { auto shiftFn = oobj::import("datatable", "shift"); oobj n = args[0]? args[0].to_oobj() : py::oint(1); @@ -510,103 +588,16 @@ DECLARE_METHOD(&PyFExpr::shift) ->arg_names({"n"}) ->n_positional_or_keyword_args(1); - - -oobj PyFExpr::last(const XArgs&) { - auto lastFn = oobj::import("datatable", "last"); - return lastFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::last) - ->name("last") - ->docs(dt::doc_FExpr_last); - - - -oobj PyFExpr::count(const XArgs&) { - auto countFn = oobj::import("datatable", "count"); - return countFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::count) - ->name("count") - ->docs(dt::doc_FExpr_count); - - - -oobj PyFExpr::first(const XArgs&) { - auto firstFn = oobj::import("datatable", "first"); - return firstFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::first) - ->name("first") - ->docs(dt::doc_FExpr_first); - - -oobj PyFExpr::as_type(const XArgs& args) { - auto as_typeFn = oobj::import("datatable", "as_type"); - oobj new_type = args[0].to_oobj(); - return as_typeFn.call({this, new_type}); -} - - -DECLARE_METHOD(&PyFExpr::as_type) - ->name("as_type") - ->docs(dt::doc_FExpr_as_type) - ->arg_names({"new_type"}) - ->n_positional_args(1) - ->n_required_args(1); - - -oobj PyFExpr::nunique(const XArgs&) { - auto nuniqueFn = oobj::import("datatable", "nunique"); - return nuniqueFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::nunique) - ->name("nunique") - ->docs(dt::doc_FExpr_nunique); - - -oobj PyFExpr::countna(const XArgs&) { - auto countnaFn = oobj::import("datatable", "countna"); - return countnaFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::countna) - ->name("countna") - ->docs(dt::doc_FExpr_countna); - - -oobj PyFExpr::cumsum(const XArgs&) { - auto cumsumFn = oobj::import("datatable", "cumsum"); - return cumsumFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::cumsum) - ->name("cumsum") - ->docs(dt::doc_FExpr_cumsum); - - -oobj PyFExpr::cummin(const XArgs&) { - auto cumminFn = oobj::import("datatable", "cummin"); - return cumminFn.call({this}); +oobj PyFExpr::sum(const XArgs&) { + auto sumFn = oobj::import("datatable", "sum"); + return sumFn.call({this}); } -DECLARE_METHOD(&PyFExpr::cummin) - ->name("cummin") - ->docs(dt::doc_FExpr_cummin); +DECLARE_METHOD(&PyFExpr::sum) + ->name("sum") + ->docs(dt::doc_FExpr_sum); -oobj PyFExpr::cummax(const XArgs&) { - auto cummaxFn = oobj::import("datatable", "cummax"); - return cummaxFn.call({this}); -} - -DECLARE_METHOD(&PyFExpr::cummax) - ->name("cummax") - ->docs(dt::doc_FExpr_cummax); //------------------------------------------------------------------------------ // Class decoration diff --git a/src/datatable/__init__.py b/src/datatable/__init__.py index 4cb7b03a36..459ad6981d 100644 --- a/src/datatable/__init__.py +++ b/src/datatable/__init__.py @@ -27,8 +27,8 @@ as_type, by, cbind, - cummin, cummax, + cummin, cumsum, cut, fread, @@ -87,8 +87,8 @@ "corr", "count", "cov", - "cummin", "cummax", + "cummin", "cumsum", "cut", "dt", diff --git a/tests/test-f.py b/tests/test-f.py index 873c0ad1fa..f26d6c5155 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -455,4 +455,5 @@ def test_cummin(): assert str(dt.cummin(f.A)) == str(f.A.cummin()) assert str(dt.cummin(f[:])) == str(f[:].cummin()) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) - assert_equals(DT[:, f.A.cummin()], DT[:, dt.cummin(f.A)]) \ No newline at end of file + assert_equals(DT[:, f.A.cummin()], DT[:, dt.cummin(f.A)]) +