From a13db50a0cd8115691fe92e67b1301422c664401 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= Date: Mon, 12 Oct 2020 16:47:44 +0200 Subject: [PATCH] fix bug in select/transform when GroupedDataFrame is reordered --- Project.toml | 2 +- src/groupeddataframe/splitapplycombine.jl | 9 ++++++-- test/grouping.jl | 26 +++++++++++++++++++++++ 3 files changed, 34 insertions(+), 3 deletions(-) diff --git a/Project.toml b/Project.toml index 1fc4b4fa60..ea9deb978e 100644 --- a/Project.toml +++ b/Project.toml @@ -1,6 +1,6 @@ name = "DataFrames" uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" -version = "0.21.7" +version = "0.21.8" [deps] CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597" diff --git a/src/groupeddataframe/splitapplycombine.jl b/src/groupeddataframe/splitapplycombine.jl index 5c9d8f5129..0ce9c219d3 100644 --- a/src/groupeddataframe/splitapplycombine.jl +++ b/src/groupeddataframe/splitapplycombine.jl @@ -1229,9 +1229,14 @@ function _combine(f::AbstractVector{<:Pair}, newcol = similar(col) # we can probably make it more efficient, but I leave it as an optimization for the future gd_idx = gd.idx - for j in eachindex(gd.idx, col) - newcol[gd_idx[j]] = col[j] + k = 0 + for (s, e) in zip(gd.starts, gd.ends) + for j in s:e + k += 1 + newcol[gd_idx[j]] = col[k] + end end + @assert k == length(gd_idx) res[i] = (col_idx, newcol) end end diff --git a/test/grouping.jl b/test/grouping.jl index f0b1d1773c..14d34e03ac 100644 --- a/test/grouping.jl +++ b/test/grouping.jl @@ -2483,4 +2483,30 @@ end @test eltype(df2.a) === eltype(df2.b) === Union{UInt, Missing} end +@testset "aggregation of reordered groups" begin + df = DataFrame(id = [1, 2, 3, 1, 3, 2], x=1:6) + gdf = groupby(df, :id) + @test select(df, :id, :x => x -> 2x) == select(gdf, :x => x -> 2x) + @test transform(df, :x => x -> 2x) == transform(gdf, :x => x -> 2x) + @test combine(gdf, :x => x -> 2x) == + DataFrame(id=[1, 1, 2, 2, 3, 3], x_function=[2, 8, 4, 12, 6, 10]) + gdf = groupby(df, :id)[[3, 1, 2]] + @test select(df, :id, :x => x -> 2x) == select(gdf, :x => x -> 2x) + @test transform(df, :x => x -> 2x) == transform(gdf, :x => x -> 2x) + @test combine(gdf, :x => x -> 2x) == + DataFrame(id=[3, 3, 1, 1, 2, 2], x_function=[6, 10, 2, 8, 4, 12]) + + df = DataFrame(id = [3, 2, 1, 3, 1, 2], x=1:6) + gdf = groupby(df, :id, sort=true) + @test select(df, :id, :x => x -> 2x) == select(gdf, :x => x -> 2x) + @test transform(df, :x => x -> 2x) == transform(gdf, :x => x -> 2x) + @test combine(gdf, :x => x -> 2x) == + DataFrame(id=[1, 1, 2, 2, 3, 3], x_function=[6, 10, 4, 12, 2, 8]) + gdf = groupby(df, :id)[[3, 1, 2]] + @test select(df, :id, :x => x -> 2x) == select(gdf, :x => x -> 2x) + @test transform(df, :x => x -> 2x) == transform(gdf, :x => x -> 2x) + @test combine(gdf, :x => x -> 2x) == + DataFrame(id=[1, 1, 3, 3, 2, 2], x_function=[6, 10, 2, 8, 4, 12]) +end + end # module