From 8ad809cd81c0d0ad9d3f798daf92a04488b8c546 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Thu, 28 Apr 2022 10:53:13 +0200
Subject: [PATCH 1/3] fix handling of variable_eltype in stack

---
 src/abstractdataframe/reshape.jl |  4 ++--
 test/reshape.jl                  | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index a864930808..6013259c87 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -164,8 +164,8 @@ function stack(df::AbstractDataFrame,
         # (note that copyto! inserts levels in their order of appearance)
         nms = names(df, ints_measure_vars)
         simnms = similar(nms, variable_eltype)
-        catnms = simnms isa Vector ? PooledArray(catnms) : simnms
-        copyto!(catnms, nms)
+        copyto!(simnms, nms)
+        catnms = simnms isa Vector ? PooledArray(simnms) : simnms
     end
     return DataFrame(AbstractVector[[repeat(df[!, c], outer=N) for c in ints_id_vars]..., # id_var columns
                                     repeat(catnms, inner=nrow(df)),                       # variable
diff --git a/test/reshape.jl b/test/reshape.jl
index f3d7c01158..fdd97f77a3 100644
--- a/test/reshape.jl
+++ b/test/reshape.jl
@@ -845,4 +845,18 @@ end
     end
 end
 
+@testset "variable_eltype in stack tests" begin
+    df = DataFrame(A = 1:3, B = [2.0, -1.1, 2.8], C = ["p","q","r"])
+    @test_throws MethodError stack(df, :C, variable_name=:D, variable_eltype=Int)
+    for T in (AbstractString, Any)
+        sdf = stack(df, [:A, :B], variable_name=:D, variable_eltype=T)
+        @test sdf == DataFrame(C=["p", "q", "r", "p", "q", "r"],
+                            D=["A", "A", "A", "B", "B", "B"],
+                            value=[1.0, 2.0, 3.0, 2.0, -1.1, 2.8])
+        @test sdf.C isa Vector{String}
+        @test sdf.value isa Vector{Float64}
+        @test sdf.D isa PooledVector{T}
+    end
+end
+
 end # module

From e475cd33b6e953c0aa095019d4595f64d8577ad9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Fri, 29 Apr 2022 19:59:44 +0200
Subject: [PATCH 2/3] Apply suggestions from code review

Co-authored-by: Milan Bouchet-Valat <nalimilan@club.fr>
---
 test/reshape.jl | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/test/reshape.jl b/test/reshape.jl
index fdd97f77a3..1f867642d4 100644
--- a/test/reshape.jl
+++ b/test/reshape.jl
@@ -846,13 +846,13 @@ end
 end
 
 @testset "variable_eltype in stack tests" begin
-    df = DataFrame(A = 1:3, B = [2.0, -1.1, 2.8], C = ["p","q","r"])
+    df = DataFrame(A=1:3, B=[2.0, -1.1, 2.8], C=["p", "q", "r"])
     @test_throws MethodError stack(df, :C, variable_name=:D, variable_eltype=Int)
     for T in (AbstractString, Any)
         sdf = stack(df, [:A, :B], variable_name=:D, variable_eltype=T)
         @test sdf == DataFrame(C=["p", "q", "r", "p", "q", "r"],
-                            D=["A", "A", "A", "B", "B", "B"],
-                            value=[1.0, 2.0, 3.0, 2.0, -1.1, 2.8])
+                               D=["A", "A", "A", "B", "B", "B"],
+                               value=[1.0, 2.0, 3.0, 2.0, -1.1, 2.8])
         @test sdf.C isa Vector{String}
         @test sdf.value isa Vector{Float64}
         @test sdf.D isa PooledVector{T}

From ae8c7634a989311f40e3d82a210494458dd7194f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bogumi=C5=82=20Kami=C5=84ski?= <bkamins@sgh.waw.pl>
Date: Sat, 30 Apr 2022 09:42:39 +0200
Subject: [PATCH 3/3] minor performance improvement

---
 Project.toml                     | 2 +-
 src/abstractdataframe/reshape.jl | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Project.toml b/Project.toml
index ebc02f7ce2..44a8c58a5e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -30,7 +30,7 @@ DataAPI = "1.10"
 InvertedIndices = "1"
 IteratorInterfaceExtensions = "0.1.1, 1"
 Missings = "0.4.2, 1"
-PooledArrays = "1.3.0"
+PooledArrays = "1.4.2"
 PrettyTables = "0.12, 1"
 Reexport = "0.1, 0.2, 1"
 ShiftedArrays = "1"
diff --git a/src/abstractdataframe/reshape.jl b/src/abstractdataframe/reshape.jl
index 6013259c87..f0cfa63aa3 100644
--- a/src/abstractdataframe/reshape.jl
+++ b/src/abstractdataframe/reshape.jl
@@ -164,8 +164,8 @@ function stack(df::AbstractDataFrame,
         # (note that copyto! inserts levels in their order of appearance)
         nms = names(df, ints_measure_vars)
         simnms = similar(nms, variable_eltype)
-        copyto!(simnms, nms)
         catnms = simnms isa Vector ? PooledArray(simnms) : simnms
+        copyto!(catnms, nms)
     end
     return DataFrame(AbstractVector[[repeat(df[!, c], outer=N) for c in ints_id_vars]..., # id_var columns
                                     repeat(catnms, inner=nrow(df)),                       # variable