From b5cd84ae7cc3c5825311450ab7200d7a74dc6f73 Mon Sep 17 00:00:00 2001 From: Alexey Stukalov Date: Sun, 21 Mar 2021 14:37:10 +0100 Subject: [PATCH] add generic vector conversion jlvec() method so that it is called to convert dataframe column with non-scalar eltype. Also restrict the first jlvec() method to numeric eltypes. fixes #82 --- src/convert.jl | 11 ++++++++--- test/RDA.jl | 15 +++++++++++++++ test/data_v2/list_of_vec.rda | Bin 0 -> 228 bytes test/data_v3/list_of_vec.rda | Bin 0 -> 238 bytes test/generate_rda.R | 8 ++++++++ 5 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 test/data_v2/list_of_vec.rda create mode 100644 test/data_v3/list_of_vec.rda diff --git a/src/convert.jl b/src/convert.jl index 276ba9b..0d6167e 100644 --- a/src/convert.jl +++ b/src/convert.jl @@ -25,7 +25,7 @@ isna(x::ComplexF64) = isna(real(x)) || isna(imag(x)) # convert R vector into Vector holding elements of type T # if force_missing is true, the result is always Vector{Union{T,Missing}}, # otherwise it's Vector{T} if `rv` doesn't contain NAs -function jlvec(::Type{T}, rv::RVEC, force_missing::Bool=true) where T +function jlvec(::Type{T}, rv::RVEC, force_missing::Bool=true) where T <: Number anyna = any(isna, rv.data) if force_missing || anyna res = convert(Vector{Union{T,Missing}}, rv.data) @@ -154,6 +154,11 @@ function jlvec(::Type{ZonedDateTime}, rv::RVEC, force_missing::Bool=true) return datetimes end +# generic vector conversion +function jlvec(::Type{T}, rv::RVEC, force_missing::Bool=true) where T + return sexp2julia.(rv.data) +end + function sexp2julia(rex::RSEXPREC) @warn "Conversion of $(typeof(rex)) to Julia is not implemented" return nothing @@ -188,10 +193,10 @@ function sexp2julia(rl::RList) DataFrame(Any[isa(col, RAltRep) ? sexp2julia(col) : jlvec(col, false) for col in rl.data], identifier.(names(rl)), makeunique=true) elseif hasnames(rl) - DictoVec(Any[sexp2julia(item) for item in rl.data], names(rl)) + DictoVec(jlvec(Any, rl), names(rl)) else # FIXME return DictoVec if forceDictoVec is on - map(sexp2julia, rl.data) + jlvec(Any, rl) end end diff --git a/test/RDA.jl b/test/RDA.jl index 9f63cf7..1f1fe7f 100644 --- a/test/RDA.jl +++ b/test/RDA.jl @@ -98,6 +98,21 @@ using RData @test f["x"] == f["y"] == ["a", "b", "c"] end + @testset "List of vectors (#82)" begin + f = load(joinpath(rdata_path, "list_of_vec.rda")) + @test f["listofvec"] isa Vector + @test length(f["listofvec"]) == 3 + @test isequal(f["listofvec"], [[1., 2., missing], [3., 4.], [5., 6., missing]]) + + @test f["namedlistofvec"] isa DictoVec + @test length(f["namedlistofvec"]) == 3 + @test f["namedlistofvec"].name2index == Dict("A"=>1, "B"=>3) + @test isequal(values(f["namedlistofvec"]), [[1., 2., missing], [3., 4.], [5., 6., missing]]) + + @test f["testdf"] isa DataFrame + @test nrow(f["testdf"]) == 3 + @test isequal(f["testdf"][!, "listascol"], [[1., 2., missing], [3., 4.], [5., 6., missing, 7.]]) + end end # for ver in ... @testset "Loading AltRep-containing RData files (version=3)" begin diff --git a/test/data_v2/list_of_vec.rda b/test/data_v2/list_of_vec.rda new file mode 100644 index 0000000000000000000000000000000000000000..de72cc20b3f6295f35309469a0844b687ab27451 GIT binary patch literal 228 zcmVS eu)zemQW8rN_0o!f`XRx^@DBiZnPDGz0ssK6<63zD literal 0 HcmV?d00001 diff --git a/test/data_v3/list_of_vec.rda b/test/data_v3/list_of_vec.rda new file mode 100644 index 0000000000000000000000000000000000000000..77b85fe8b3816bd00adb8a49eb44bb3bdcb18293 GIT binary patch literal 238 zcmVz4Rfi*P5P1k~ffrSakVFU_u z0%^{i%;J*#w6fG>AV(OigMon$MB9I000RdI#ZV9CvoC_mGdXZT-p8@|%MUOc}_b%>SWIVnTNkIIa_mlk;=X6G2gaxgIP;ctKhlfW-g* o|No)~TXIffF)Y|%f?O$yC5d`zML_+K;9~d(0KdNAae)E=0Ii;42mk;8 literal 0 HcmV?d00001 diff --git a/test/generate_rda.R b/test/generate_rda.R index 54b50f6..ceb25cb 100644 --- a/test/generate_rda.R +++ b/test/generate_rda.R @@ -124,6 +124,14 @@ altrepnames_df <- as.data.frame(altrepnames_list) names(altrepnames_df) <- names(altrepnames_list) save(altrepnames_list, altrepnames_df, file=file.path("data_v3", "altrep_names.rda"), version=3) +# list of vectors variable and list of vectors column (for #82) +listofvec <- list(c(1, 2, NA), c(3, 4), c(5, 6, NA)) +namedlistofvec <- list(A=c(1, 2, NA), c(3, 4), B=c(5, 6, NA)) +testdf <- data.frame(a = c("a", "b", "c")) +testdf$listascol <- list(c(1, 2, NA), c(3, 4), c(5, 6, NA, 7)) +save(listofvec, namedlistofvec, testdf, file=file.path("data_v2", "list_of_vec.rda"), version=2) +save(listofvec, namedlistofvec, testdf, file=file.path("data_v3", "list_of_vec.rda"), version=3) + # generate files using each of the supported compression types df <- data.frame(num = c(1.1, 2.2)) rdata_path <- "data_v3"