From 58507fdd11da955480de131561f6dcc03c9d740d Mon Sep 17 00:00:00 2001 From: Samuel Oranyeli Date: Sun, 20 Nov 2022 12:25:37 +1100 Subject: [PATCH] [ENH] Add `reverse` parameter to `cumsum()`, `cumprod()`, `cummin()` and `cummax()` (#3381) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `reverse` parameter to control direction of cumulative function's calculations: - when `False`, calculation is done from top to bottom (default); - when `True`, calculation is done from bottom to top. Сloses #3279 --- docs/api/dt/cummax.rst | 26 +++++++++++++--- docs/api/dt/cummin.rst | 22 ++++++++++++-- docs/api/dt/cumprod.rst | 22 ++++++++++++-- docs/api/dt/cumsum.rst | 22 ++++++++++++-- src/core/column/cumminmax.h | 50 +++++++++++++++++++++---------- src/core/column/cumsumprod.h | 36 +++++++++++++++++----- src/core/expr/fexpr.cc | 42 ++++++++++++++++++-------- src/core/expr/fexpr_cumminmax.cc | 26 ++++++++++++---- src/core/expr/fexpr_cumsumprod.cc | 28 +++++++++++++---- tests/dt/test-cumminmax.py | 33 ++++++++++++++++---- tests/dt/test-cumprod.py | 28 +++++++++++++---- tests/dt/test-cumsum.py | 28 +++++++++++++---- tests/test-f.py | 9 ++++++ 13 files changed, 298 insertions(+), 74 deletions(-) diff --git a/docs/api/dt/cummax.rst b/docs/api/dt/cummax.rst index 938abd22bd..df6d1c23ed 100644 --- a/docs/api/dt/cummax.rst +++ b/docs/api/dt/cummax.rst @@ -3,18 +3,22 @@ :src: src/core/expr/fexpr_cumminmax.cc pyfn_cummax :tests: tests/dt/test-cumminmax.py :cvar: doc_dt_cummax - :signature: cummax(cols) + :signature: cummax(cols, reverse=False) .. x-version-added:: 1.1.0 - For each column from `cols` calculate cumulative maximum. In the presence of :func:`by()`, - the cumulative maximum is computed within each group. + For each column from `cols` calculate cumulative maximum. In the presence + of :func:`by()`, the cumulative maximum is computed within each group. Parameters ---------- cols: FExpr Input data for cumulative maximum calculation. + reverse: bool + If ``False``, computation is done from top to bottom. + If ``True``, it is done from bottom to top. + return: FExpr f-expression that converts input columns into the columns filled with the respective cumulative maximums. @@ -57,6 +61,20 @@ 3 | 5 4 | 5 [5 rows x 1 column] + + + Calculate the cumulative maximum from bottom to top:: + + >>> DT[:, dt.cummax(f.A, reverse=True)] + | A + | int32 + -- + ----- + 0 | 5 + 1 | 5 + 2 | 5 + 3 | 0 + 4 | 0 + [5 rows x 1 column] Calculate the cumulative maximum in multiple columns:: @@ -73,7 +91,7 @@ [5 rows x 3 columns] - In the presence of :func:`by()` calculate the cumulative maximum within each group:: + For a grouped frame calculate the cumulative maximum within each group:: >>> DT[:, dt.cummax(f[:]), by('D')] | D A B C diff --git a/docs/api/dt/cummin.rst b/docs/api/dt/cummin.rst index b17d8afabc..f6754f273c 100644 --- a/docs/api/dt/cummin.rst +++ b/docs/api/dt/cummin.rst @@ -3,7 +3,7 @@ :src: src/core/expr/fexpr_cumminmax.cc pyfn_cummin :tests: tests/dt/test-cumminmax.py :cvar: doc_dt_cummin - :signature: cummin(cols) + :signature: cummin(cols, reverse=False) .. x-version-added:: 1.1.0 @@ -15,6 +15,10 @@ cols: FExpr Input data for cumulative minimum calculation. + reverse: bool + If ``False``, computation is done from top to bottom. + If ``True``, it is done from bottom to top. + return: FExpr f-expression that converts input columns into the columns filled with the respective cumulative minimums. @@ -57,6 +61,20 @@ 3 | -1 4 | -1 [5 rows x 1 column] + + + Calculate the cumulative minimum from bottom to top:: + + >>> DT[:, dt.cummin(f.A, reverse=True)] + | A + | int32 + -- + ----- + 0 | -1 + 1 | -1 + 2 | -1 + 3 | -1 + 4 | 0 + [5 rows x 1 column] Calculate the cumulative minimum in multiple columns:: @@ -73,7 +91,7 @@ [5 rows x 3 columns] - In the presence of :func:`by()` calculate the cumulative minimum within each group:: + For a grouped frame calculate the cumulative minimum within each group:: >>> DT[:, dt.cummin(f[:]), by('D')] | D A B C diff --git a/docs/api/dt/cumprod.rst b/docs/api/dt/cumprod.rst index 3b10177f11..5ede171b93 100644 --- a/docs/api/dt/cumprod.rst +++ b/docs/api/dt/cumprod.rst @@ -3,7 +3,7 @@ :src: src/core/expr/fexpr_cumsumprod.cc pyfn_cumprod :tests: tests/dt/test-cumprod.py :cvar: doc_dt_cumprod - :signature: cumprod(cols) + :signature: cumprod(cols, reverse=False) .. x-version-added:: 1.1.0 @@ -16,6 +16,10 @@ cols: FExpr Input data for cumulative product calculation. + reverse: bool + If ``False``, computation is done from top to bottom. + If ``True``, it is done from bottom to top. + return: FExpr f-expression that converts input columns into the columns filled with the respective cumulative products. @@ -58,6 +62,20 @@ 3 | -10 4 | 0 [5 rows x 1 column] + + + Calculate the cumulative product from bottom to top:: + + >>> DT[:, dt.cumprod(f.A, reverse=True)] + | A + | int64 + -- + ----- + 0 | 0 + 1 | 0 + 2 | 0 + 3 | 0 + 4 | 0 + [5 rows x 1 column] Calculate cumulative products in multiple columns:: @@ -74,7 +92,7 @@ [5 rows x 3 columns] - In the presence of :func:`by()` calculate cumulative products within each group:: + For a grouped frame calculate cumulative products within each group:: >>> DT[:, dt.cumprod(f[:]), by('D')] | D A B C diff --git a/docs/api/dt/cumsum.rst b/docs/api/dt/cumsum.rst index d3bbb72863..88c9b26c20 100644 --- a/docs/api/dt/cumsum.rst +++ b/docs/api/dt/cumsum.rst @@ -3,7 +3,7 @@ :src: src/core/expr/fexpr_cumsumprod.cc pyfn_cumsum :tests: tests/dt/test-cumsum.py :cvar: doc_dt_cumsum - :signature: cumsum(cols) + :signature: cumsum(cols, reverse=False) .. x-version-added:: 1.1.0 @@ -16,6 +16,10 @@ cols: FExpr Input data for cumulative sum calculation. + reverse: bool + If ``False``, computation is done from top to bottom. + If ``True``, it is done from bottom to top. + return: FExpr f-expression that converts input columns into the columns filled with the respective cumulative sums. @@ -60,6 +64,20 @@ [5 rows x 1 column] + Calculate the cumulative sum from bottom to top:: + + >>> DT[:, dt.cumsum(f.A, reverse=True)] + | A + | int64 + -- + ----- + 0 | 6 + 1 | 4 + 2 | 4 + 3 | -1 + 4 | 0 + [5 rows x 1 column] + + Calculate cumulative sums in multiple columns:: >>> DT[:, dt.cumsum(f[:-1])] @@ -74,7 +92,7 @@ [5 rows x 3 columns] - In the presence of :func:`by()` calculate cumulative sums within each group:: + For a grouped frame calculate cumulative sums within each group:: >>> DT[:, dt.cumsum(f[:]), by('D')] | D A B C diff --git a/src/core/column/cumminmax.h b/src/core/column/cumminmax.h index 9511b699f6..a0e3a1a320 100644 --- a/src/core/column/cumminmax.h +++ b/src/core/column/cumminmax.h @@ -29,7 +29,7 @@ namespace dt { -template +template class CumMinMax_ColumnImpl : public Virtual_ColumnImpl { private: Column col_; @@ -57,25 +57,43 @@ class CumMinMax_ColumnImpl : public Virtual_ColumnImpl { [&](size_t gi) { size_t i1 = size_t(offsets[gi]); size_t i2 = size_t(offsets[gi + 1]); - T val; - bool res_valid = col_.get_element(i1, &val); - data[i1] = res_valid? val : GETNA(); - - for (size_t i = i1 + 1; i < i2; ++i) { - bool is_valid = col_.get_element(i, &val); - if (is_valid) { - if (MIN) { - data[i] = (res_valid && data[i - 1] < val)? data[i - 1] : val; + + if (REVERSE) { + bool res_valid = col_.get_element(i2 - 1, &val); + data[i2 - 1] = res_valid? val : GETNA(); + + for (size_t i = i2 - 1; i-- > i1;) { + bool is_valid = col_.get_element(i, &val); + if (is_valid) { + if (MIN) { + data[i] = (res_valid && data[i + 1] < val)? data[i + 1] : val; + } else { + data[i] = (res_valid && data[i + 1] > val)? data[i + 1] : val; + } + res_valid = true; } else { - data[i] = (res_valid && data[i - 1] > val)? data[i - 1] : val; + data[i] = data[i + 1]; + } + } + } else { + bool res_valid = col_.get_element(i1, &val); + data[i1] = res_valid? val : GETNA(); + + for (size_t i = i1 + 1; i < i2; ++i) { + bool is_valid = col_.get_element(i, &val); + if (is_valid) { + if (MIN) { + data[i] = (res_valid && data[i - 1] < val)? data[i - 1] : val; + } else { + data[i] = (res_valid && data[i - 1] > val)? data[i - 1] : val; + } + res_valid = true; + } else { + data[i] = data[i - 1]; } - res_valid = true; - } else { - data[i] = data[i - 1]; } } - }); @@ -87,10 +105,12 @@ class CumMinMax_ColumnImpl : public Virtual_ColumnImpl { return new CumMinMax_ColumnImpl(Column(col_), gby_); } + size_t n_children() const noexcept override { return 1; } + const Column& child(size_t i) const override { xassert(i == 0); (void)i; return col_; diff --git a/src/core/column/cumsumprod.h b/src/core/column/cumsumprod.h index 019b07d630..aca71f9f0a 100644 --- a/src/core/column/cumsumprod.h +++ b/src/core/column/cumsumprod.h @@ -29,7 +29,7 @@ namespace dt { - template + template class CumSumProd_ColumnImpl : public Virtual_ColumnImpl { private: Column col_; @@ -44,6 +44,7 @@ namespace dt { xassert(col_.can_be_read_as()); } + void materialize(Column &col_out, bool) override { Latent_ColumnImpl::vivify(col_); Column col = Column::new_data_column(col_.nrows(), col_.stype()); @@ -51,12 +52,28 @@ namespace dt { auto offsets = gby_.offsets_r(); dt::parallel_for_dynamic( - gby_.size(), - [&](size_t gi) { - size_t i1 = size_t(offsets[gi]); - size_t i2 = size_t(offsets[gi + 1]); + gby_.size(), + [&](size_t gi) { + size_t i1 = size_t(offsets[gi]); + size_t i2 = size_t(offsets[gi + 1]); + T val; - T val; + if (REVERSE) { + bool is_valid = col_.get_element(i2 - 1, &val); + if (SUM) { + data[i2 - 1] = is_valid? val : 0; + } else { + data[i2 - 1] = is_valid? val : 1; + } + for (size_t i = i2 - 1; i-- > i1;) { + is_valid = col_.get_element(i, &val); + if (SUM) { + data[i] = data[i + 1] + (is_valid? val : 0); + } else { + data[i] = data[i + 1] * (is_valid? val : 1); + } + } + } else { bool is_valid = col_.get_element(i1, &val); if (SUM) { data[i1] = is_valid? val : 0; @@ -71,19 +88,24 @@ namespace dt { data[i] = data[i - 1] * (is_valid? val : 1); } } - }); + } + } + ); col_out = std::move(col); } + ColumnImpl *clone() const override { return new CumSumProd_ColumnImpl(Column(col_), gby_); } + size_t n_children() const noexcept override { return 1; } + const Column &child(size_t i) const override { xassert(i == 0); (void)i; diff --git a/src/core/expr/fexpr.cc b/src/core/expr/fexpr.cc index 4ae36e5310..d337f3720a 100644 --- a/src/core/expr/fexpr.cc +++ b/src/core/expr/fexpr.cc @@ -392,42 +392,60 @@ DECLARE_METHOD(&PyFExpr::countna) ->name("countna") ->docs(dt::doc_FExpr_countna); -oobj PyFExpr::cummax(const XArgs&) { +oobj PyFExpr::cummax(const XArgs& args) { auto cummaxFn = oobj::import("datatable", "cummax"); - return cummaxFn.call({this}); + oobj reverse = args[0]? args[0].to_oobj() : py::obool(false); + return cummaxFn.call({this, reverse}); } DECLARE_METHOD(&PyFExpr::cummax) ->name("cummax") - ->docs(dt::doc_FExpr_cummax); + ->docs(dt::doc_FExpr_cummax) + ->arg_names({"reverse"}) + ->n_positional_or_keyword_args(1) + ->n_required_args(0); + -oobj PyFExpr::cummin(const XArgs&) { +oobj PyFExpr::cummin(const XArgs& args) { auto cumminFn = oobj::import("datatable", "cummin"); - return cumminFn.call({this}); + oobj reverse = args[0]? args[0].to_oobj() : py::obool(false); + return cumminFn.call({this, reverse}); } DECLARE_METHOD(&PyFExpr::cummin) ->name("cummin") - ->docs(dt::doc_FExpr_cummin); + ->docs(dt::doc_FExpr_cummin) + ->arg_names({"reverse"}) + ->n_positional_or_keyword_args(1) + ->n_required_args(0); -oobj PyFExpr::cumprod(const XArgs&) { + +oobj PyFExpr::cumprod(const XArgs& args) { auto cumprodFn = oobj::import("datatable", "cumprod"); - return cumprodFn.call({this}); + oobj reverse = args[0]? args[0].to_oobj() : py::obool(false); + return cumprodFn.call({this, reverse}); } DECLARE_METHOD(&PyFExpr::cumprod) ->name("cumprod") - ->docs(dt::doc_FExpr_cumprod); + ->docs(dt::doc_FExpr_cumprod) + ->arg_names({"reverse"}) + ->n_positional_or_keyword_args(1) + ->n_required_args(0); -oobj PyFExpr::cumsum(const XArgs&) { +oobj PyFExpr::cumsum(const XArgs& args) { auto cumsumFn = oobj::import("datatable", "cumsum"); - return cumsumFn.call({this}); + oobj reverse = args[0]? args[0].to_oobj() : py::obool(false); + return cumsumFn.call({this, reverse}); } DECLARE_METHOD(&PyFExpr::cumsum) ->name("cumsum") - ->docs(dt::doc_FExpr_cumsum); + ->docs(dt::doc_FExpr_cumsum) + ->arg_names({"reverse"}) + ->n_positional_or_keyword_args(1) + ->n_required_args(0); diff --git a/src/core/expr/fexpr_cumminmax.cc b/src/core/expr/fexpr_cumminmax.cc index 3dfb8bd964..ec8a3db55d 100644 --- a/src/core/expr/fexpr_cumminmax.cc +++ b/src/core/expr/fexpr_cumminmax.cc @@ -33,7 +33,7 @@ namespace dt { namespace expr { -template +template class FExpr_CumMinMax : public FExpr_Func { private: ptrExpr arg_; @@ -46,6 +46,8 @@ class FExpr_CumMinMax : public FExpr_Func { std::string out = MIN? "cummin" : "cummax"; out += '('; out += arg_->repr(); + out += ", reverse="; + out += REVERSE? "True" : "False"; out += ')'; return out; } @@ -103,7 +105,7 @@ class FExpr_CumMinMax : public FExpr_Func { template Column make(Column&& col, const Groupby& gby) const { return Column(new Latent_ColumnImpl( - new CumMinMax_ColumnImpl(std::move(col), gby) + new CumMinMax_ColumnImpl(std::move(col), gby) )); } }; @@ -111,29 +113,41 @@ class FExpr_CumMinMax : public FExpr_Func { static py::oobj pyfn_cummax(const py::XArgs& args) { auto cummax = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CumMinMax(as_fexpr(cummax))); + bool reverse = args[1].to(false); + if (reverse) { + return PyFExpr::make(new FExpr_CumMinMax(as_fexpr(cummax))); + } else { + return PyFExpr::make(new FExpr_CumMinMax(as_fexpr(cummax))); + } } static py::oobj pyfn_cummin(const py::XArgs& args) { auto cummin = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CumMinMax(as_fexpr(cummin))); + bool reverse = args[1].to(false); + if (reverse) { + return PyFExpr::make(new FExpr_CumMinMax(as_fexpr(cummin))); + } else { + return PyFExpr::make(new FExpr_CumMinMax(as_fexpr(cummin))); + } } DECLARE_PYFN(&pyfn_cummax) ->name("cummax") ->docs(doc_dt_cummax) - ->arg_names({"cummax"}) + ->arg_names({"cols", "reverse"}) ->n_positional_args(1) + ->n_positional_or_keyword_args(1) ->n_required_args(1); DECLARE_PYFN(&pyfn_cummin) ->name("cummin") ->docs(doc_dt_cummin) - ->arg_names({"cummin"}) + ->arg_names({"cols", "reverse"}) ->n_positional_args(1) + ->n_positional_or_keyword_args(1) ->n_required_args(1); diff --git a/src/core/expr/fexpr_cumsumprod.cc b/src/core/expr/fexpr_cumsumprod.cc index 75d8a084d0..179fccd7c2 100644 --- a/src/core/expr/fexpr_cumsumprod.cc +++ b/src/core/expr/fexpr_cumsumprod.cc @@ -32,7 +32,7 @@ namespace dt { namespace expr { -template +template class FExpr_CumSumProd : public FExpr_Func { private: ptrExpr arg_; @@ -45,6 +45,8 @@ class FExpr_CumSumProd : public FExpr_Func { std::string out = SUM? "cumsum" : "cumprod"; out += '('; out += arg_->repr(); + out += ", reverse="; + out += REVERSE? "True" : "False"; out += ')'; return out; } @@ -97,7 +99,7 @@ class FExpr_CumSumProd : public FExpr_Func { Column make(Column &&col, SType stype, const Groupby &gby) const { col.cast_inplace(stype); return Column(new Latent_ColumnImpl( - new CumSumProd_ColumnImpl(std::move(col), gby) + new CumSumProd_ColumnImpl(std::move(col), gby) )); } }; @@ -105,26 +107,40 @@ class FExpr_CumSumProd : public FExpr_Func { static py::oobj pyfn_cumsum(const py::XArgs &args) { auto cumsum = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CumSumProd(as_fexpr(cumsum))); + bool reverse = args[1].to(false); + if (reverse) { + return PyFExpr::make(new FExpr_CumSumProd(as_fexpr(cumsum))); + } else { + return PyFExpr::make(new FExpr_CumSumProd(as_fexpr(cumsum))); + } } + static py::oobj pyfn_cumprod(const py::XArgs &args) { auto cumprod = args[0].to_oobj(); - return PyFExpr::make(new FExpr_CumSumProd(as_fexpr(cumprod))); + bool reverse = args[1].to(false); + if (reverse) { + return PyFExpr::make(new FExpr_CumSumProd(as_fexpr(cumprod))); + } else { + return PyFExpr::make(new FExpr_CumSumProd(as_fexpr(cumprod))); + } } + DECLARE_PYFN(&pyfn_cumsum) ->name("cumsum") ->docs(doc_dt_cumsum) - ->arg_names({"cols"}) + ->arg_names({"cols", "reverse"}) ->n_positional_args(1) + ->n_positional_or_keyword_args(1) ->n_required_args(1); DECLARE_PYFN(&pyfn_cumprod) ->name("cumprod") ->docs(doc_dt_cumprod) - ->arg_names({"cols"}) + ->arg_names({"cols", "reverse"}) ->n_positional_args(1) + ->n_positional_or_keyword_args(1) ->n_required_args(1); diff --git a/tests/dt/test-cumminmax.py b/tests/dt/test-cumminmax.py index 43ce1b84f1..693694d5a0 100644 --- a/tests/dt/test-cumminmax.py +++ b/tests/dt/test-cumminmax.py @@ -50,7 +50,7 @@ def test_cumminmax_non_numeric_by(mm): @pytest.mark.parametrize("mm", [cummin, cummax]) def test_cumminmax_no_argument(mm): msg = (f"Function datatable.{mm.__name__}" - "\\(\\) requires exactly 1 positional argument, but none were given") + "\\(\\) requires at least 1 positional argument, but none were given") with pytest.raises(TypeError, match = msg): mm() @@ -61,11 +61,13 @@ def test_cumminmax_no_argument(mm): @pytest.mark.parametrize("mm", [cummin, cummax]) def test_cumminmax_str(mm): - assert str(mm(f.A)) == "FExpr<" + mm.__name__ + "(f.A)>" - assert str(mm(f.A) + 1) == "FExpr<" + mm.__name__ + "(f.A) + 1>" - assert str(mm(f.A + f.B)) == "FExpr<" + mm.__name__ + "(f.A + f.B)>" - assert str(mm(f.B)) == "FExpr<" + mm.__name__ + "(f.B)>" - assert str(mm(f[:2])) == "FExpr<"+ mm.__name__ + "(f[:2])>" + assert str(mm(f.A)) == "FExpr<" + mm.__name__ + "(f.A, reverse=False)>" + assert str(mm(f.A) + 1) == "FExpr<" + mm.__name__ + "(f.A, reverse=False) + 1>" + assert str(mm(f.A + f.B)) == "FExpr<" + mm.__name__ + "(f.A + f.B, reverse=False)>" + assert str(mm(f.B)) == "FExpr<" + mm.__name__ + "(f.B, reverse=False)>" + assert str(mm(f[:2])) == "FExpr<"+ mm.__name__ + "(f[:2], reverse=False)>" + assert str(mm(f[:2], True)) == "FExpr<"+ mm.__name__ + "(f[:2], reverse=True)>" + assert str(mm(f[:2], reverse=True)) == "FExpr<"+ mm.__name__ + "(f[:2], reverse=True)>" @pytest.mark.parametrize("mm", [cummin, cummax]) @@ -180,3 +182,22 @@ def test_cumminmax_groupby_complex(): assert_equals(DT_mm, DT_ref) +def test_cumminmax_reverse(): + from datetime import date as d + src = [None, d(1997, 9, 1), d(2002, 7, 31), None, d(2000, 2, 20)] + DT = dt.Frame(src) + DT_mm = DT[:, [cummin(f.C0, reverse=True), cummax(f.C0, True)]] + DT_ref = DT[::-1, :][:, [cummin(f.C0), cummax(f.C0)]][::-1, :] + assert_equals(DT_mm, DT_ref) + + +def test_cumminmax_groupby_reverse(): + DT = dt.Frame([[3, 14, 15, 92, 6], ["a", "cat", "a", "dog", "cat"]]) + DT_mm = DT[:, [cummin(f[0], reverse=True), cummax(f[0], True)], by(f[1])] + + DT_ref = dt.Frame( + {"C1" : ["a", "a", "cat", "cat", "dog"], + "C0" : [3, 15, 6, 6, 92], + "C2" : [15, 15, 14, 6, 92]} + ) + assert_equals(DT_mm, DT_ref) diff --git a/tests/dt/test-cumprod.py b/tests/dt/test-cumprod.py index 0ec595f204..7814bf6d30 100644 --- a/tests/dt/test-cumprod.py +++ b/tests/dt/test-cumprod.py @@ -42,7 +42,7 @@ def test_cumprod_non_numeric_by(): DT[:, cumprod(f[0]), by(f[0])] def test_cumprod_no_argument(): - match = r'Function datatable.cumprod\(\) requires exactly 1 positional argument, ' \ + match = r'Function datatable.cumprod\(\) requires at least 1 positional argument, ' \ 'but none were given' with pytest.raises(TypeError, match = match): dt.cumprod() @@ -53,11 +53,14 @@ def test_cumprod_no_argument(): #------------------------------------------------------------------------------- def test_cumprod_str(): - assert str(cumprod(f.A)) == "FExpr" - assert str(cumprod(f.A) + 1) == "FExpr" - assert str(cumprod(f.A + f.B)) == "FExpr" - assert str(cumprod(f.B)) == "FExpr" - assert str(cumprod(f[:2])) == "FExpr" + assert str(cumprod(f.A)) == "FExpr" + assert str(cumprod(f.A, True)) == "FExpr" + assert str(cumprod(f.A) + 1) == "FExpr" + assert str(cumprod(f.A + f.B)) == "FExpr" + assert str(cumprod(f.A + f.B, reverse=True)) == "FExpr" + assert str(cumprod(f.B)) == "FExpr" + assert str(cumprod(f[:2])) == "FExpr" + assert str(cumprod(f[:2], reverse=True)) == "FExpr" def test_cumprod_empty_frame(): @@ -87,6 +90,12 @@ def test_cumprod_small(): DT_ref = dt.Frame([[0, 0, 0, 0, 0]/dt.int64, [-1, -1, -1, -2, -11]/dt.float64]) assert_equals(DT_cumprod, DT_ref) +def test_cumprod_reverse(): + DT = dt.Frame([range(5), [-1, 1, None, 2, 5.5]]) + DT_cumprod = DT[:, cumprod(f[:], reverse=True)] + DT_ref = DT[::-1, cumprod(f[:])][::-1, :] + assert_equals(DT_cumprod, DT_ref) + def test_cumprod_groupby(): DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, 2, 3]]) @@ -95,6 +104,13 @@ def test_cumprod_groupby(): assert_equals(DT_cumprod, DT_ref) +def test_cumprod_groupby_reverse(): + DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, 2, 3]]) + DT_cumprod = DT[:, cumprod(f[:], reverse=True), by(f[0])] + DT_ref = dt.Frame([[1, 1, 1, 2, 2], [-math.inf, math.inf, 2.0, 4.5, 3.0]/dt.float64]) + assert_equals(DT_cumprod, DT_ref) + + def test_cumprod_void_grouped_column(): DT = dt.Frame([None]*10) DT_cumprod = DT[:, cumprod(f.C0), by(f.C0)] diff --git a/tests/dt/test-cumsum.py b/tests/dt/test-cumsum.py index 02b43c36f5..b328cb3fa6 100644 --- a/tests/dt/test-cumsum.py +++ b/tests/dt/test-cumsum.py @@ -42,7 +42,7 @@ def test_cumsum_non_numeric_by(): DT[:, cumsum(f[0]), by(f[0])] def test_cumsum_no_argument(): - match = r'Function datatable.cumsum\(\) requires exactly 1 positional argument, ' \ + match = r'Function datatable.cumsum\(\) requires at least 1 positional argument, ' \ 'but none were given' with pytest.raises(TypeError, match = match): dt.cumsum() @@ -53,11 +53,13 @@ def test_cumsum_no_argument(): #------------------------------------------------------------------------------- def test_cumsum_str(): - assert str(cumsum(f.A)) == "FExpr" - assert str(cumsum(f.A) + 1) == "FExpr" - assert str(cumsum(f.A + f.B)) == "FExpr" - assert str(cumsum(f.B)) == "FExpr" - assert str(cumsum(f[:2])) == "FExpr" + assert str(cumsum(f.A)) == "FExpr" + assert str(cumsum(f.A, True)) == "FExpr" + assert str(cumsum(f.A) + 1) == "FExpr" + assert str(cumsum(f.A + f.B)) == "FExpr" + assert str(cumsum(f.B)) == "FExpr" + assert str(cumsum(f[:2])) == "FExpr" + assert str(cumsum(f[:2], reverse=True)) == "FExpr" def test_cumsum_empty_frame(): @@ -88,6 +90,13 @@ def test_cumsum_small(): assert_equals(DT_cumsum, DT_ref) +def test_cumsum_reverse(): + DT = dt.Frame([range(5), [-1, 1, None, 2, 5.5]]) + DT_cumsum = DT[:, cumsum(f[:], reverse=True)] + DT_ref = DT[::-1, cumsum(f[:])][::-1, :] + assert_equals(DT_cumsum, DT_ref) + + def test_cumsum_groupby(): DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, 2, 3]]) DT_cumsum = DT[:, cumsum(f[:]), by(f[0])] @@ -95,6 +104,13 @@ def test_cumsum_groupby(): assert_equals(DT_cumsum, DT_ref) +def test_cumsum_groupby_reverse(): + DT = dt.Frame([[2, 1, 1, 1, 2], [1.5, -1.5, math.inf, 2, 3]]) + DT_cumsum = DT[:, cumsum(f[:], reverse=True), by(f[0])] + DT_ref = dt.Frame([[1, 1, 1, 2, 2], [math.inf, math.inf, 2.0, 4.5, 3.0]/dt.float64]) + assert_equals(DT_cumsum, DT_ref) + + def test_cumsum_void_grouped_column(): DT = dt.Frame([None]*10) DT_cumsum = DT[:, cumsum(f.C0), by(f.C0)] diff --git a/tests/test-f.py b/tests/test-f.py index 2f11c572cb..4216d0a475 100644 --- a/tests/test-f.py +++ b/tests/test-f.py @@ -441,29 +441,38 @@ def test_countna(): def test_cumsum(): assert str(dt.cumsum(f.A)) == str(f.A.cumsum()) assert str(dt.cumsum(f[:])) == str(f[:].cumsum()) + assert str(dt.cumsum(f[:], True)) == str(f[:].cumsum(True)) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) assert_equals(DT[:, f.A.cumsum()], DT[:, dt.cumsum(f.A)]) + assert_equals(DT[:, f.A.cumsum(reverse=True)], DT[:, dt.cumsum(f.A, reverse=True)]) + def test_cummax(): assert str(dt.cummax(f.A)) == str(f.A.cummax()) + assert str(dt.cummax(f.A, reverse=True)) == str(f.A.cummax(True)) assert str(dt.cummax(f[:])) == str(f[:].cummax()) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) assert_equals(DT[:, f.A.cummax()], DT[:, dt.cummax(f.A)]) + assert_equals(DT[:, f.A.cummax(reverse=True)], DT[:, dt.cummax(f.A, reverse=True)]) def test_cummin(): assert str(dt.cummin(f.A)) == str(f.A.cummin()) + assert str(dt.cummin(f.A, reverse=True)) == str(f.A.cummin(True)) assert str(dt.cummin(f[:])) == str(f[:].cummin()) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) assert_equals(DT[:, f.A.cummin()], DT[:, dt.cummin(f.A)]) + assert_equals(DT[:, f.A.cummin(True)], DT[:, dt.cummin(f.A, True)]) def test_cumprod(): assert str(dt.cumprod(f.A)) == str(f.A.cumprod()) assert str(dt.cumprod(f[:])) == str(f[:].cumprod()) + assert str(dt.cumprod(f[:], reverse=True)) == str(f[:].cumprod(reverse=True)) DT = dt.Frame(A = [9, 8, 2, 3, None, None, 3, 0, 5, 5, 8, None, 1]) assert_equals(DT[:, f.A.cumprod()], DT[:, dt.cumprod(f.A)]) + assert_equals(DT[:, f.A.cumprod(True)], DT[:, dt.cumprod(f.A, True)]) def test_fillna():