Skip to content

Commit

Permalink
Merge pull request #86 from thibautjombart/SNPbin-fix
Browse files Browse the repository at this point in the history
SNPbin fix
  • Loading branch information
thibautjombart committed Aug 18, 2015
2 parents dff44a6 + 0ec7ff4 commit da6043a
Show file tree
Hide file tree
Showing 3 changed files with 135 additions and 25 deletions.
6 changes: 4 additions & 2 deletions ChangeLog
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,10 @@ BUG FIXES
instead, computations are done using all available loci

o adegenetTutorial now opens up-to-date tutorials



o subsetting genlight objects now treats missing data appropriately
when given logical, character, or negative subscripts.
(See issue #83 (https://github.com/thibautjombart/adegenet/issues/83))

CHANGES IN ADEGENET VERSION 2.0.0
CHANGES IN GENIND/GENPOP/GENLIGHT OBJECTS
Expand Down
96 changes: 74 additions & 22 deletions R/glHandle.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,32 +17,81 @@
return(x)
}

# Zhian N. Kamvar
# Mon Aug 17 09:39:12 2015 ------------------------------
#
# This function takes two steps:
# 1. Subset the missing positions
# 2. Subset the vectors of raw SNPs
#
# Both steps are not exactly straighforward. Because the missing vector only
# represents the positions of missing data, it must be subset by value as
# opposed to position.
.SNPbinset <- function(x, i){
if (missing(i)) i <- TRUE
n.loc <- x@n.loc
if (length(x@NA.posi) > 0){
namatches <- match(i, x@NA.posi, nomatch = 0)
if (missing(i)) i <- TRUE

# Create a logical value indicating whether or not subsetting is necessary.
we_take_all <- length(i) == 1 && is.logical(i) && i
n.loc <- x@n.loc
if (length(x@NA.posi) > 0){
if (is.logical(i)){
if (we_take_all){
# Keep all of the data
return(x)
} else {
# If the positons are logical, perhaps the best way to address this is
# to match the TRUE positions to the NA.posi vector. Adding nomatch = 0
# avoids introducing NAs.
namatches <- match(which(i), x@NA.posi, nomatch = 0)
nas.kept <- x@NA.posi[namatches]
if (length(nas.kept) > 0){
old.posi <- 1:n.loc
x@NA.posi <- match(nas.kept, old.posi[i])
} else {
x@NA.posi <- nas.kept
}
}
} else if (is.character(i)){
stop("Cannot subset a SNPbin object with a character vector", call. = FALSE)
} else if (all(i < 0)){
# For negative subscripts, find which ones they match and then
# negate those. Luckily -0 is allowed.
namatches <- match(abs(i), x@NA.posi, nomatch = 0)
# Unfortunately, if nothing matches, then the default are zeroes. When you
# subset a vector in R with only zero, you will get an empty vector. This
# conditional makes sure that NA positions are retained.
if (all(namatches == 0)){
nas.kept <- x@NA.posi
} else {
nas.kept <- x@NA.posi[-namatches]
}

} else if (all(i > 0)){
# Positive subscripts are much easier. First you find where the subscripts
# match and then your subset with those positions.
namatches <- match(i, x@NA.posi, nomatch = 0)
nas.kept <- x@NA.posi[namatches]
} else {
stop("Cannot subset a SNPbin with mixed subscripts.", call. = FALSE)
}
if (length(i) == 1 && is.logical(i) && i){
return(x)
} else if (all(is.logical(i))){
n.loc <- sum(i)
} else if (any(i < 0)){
n.loc <- n.loc - length(i)
# After we find out which missing positions we need to keep, we reset the
# missing positions to the subset data.
if (length(nas.kept) > 0){
old.posi <- 1:n.loc
x@NA.posi <- match(nas.kept, old.posi[i])
} else {
n.loc <- length(i)
x@NA.posi <- nas.kept
}
x@snp <- lapply(x@snp, .subsetbin, i)
x@n.loc <- n.loc

}
# Here we calculate the number of loci we will have left in the data.
if (we_take_all){
return(x)
} else if (all(is.logical(i))){
n.loc <- sum(i)
} else if (any(i < 0)){
n.loc <- n.loc - length(i)
} else {
n.loc <- length(i)
}
# Now we loop over all chromosomes and subset.
x@snp <- lapply(x@snp, .subsetbin, i)
# Set the new value of the number of loci and return.
x@n.loc <- n.loc
return(x)
}

###############
Expand Down Expand Up @@ -123,12 +172,15 @@ setMethod("[", signature(x="genlight", i="ANY", j="ANY", drop="ANY"), function(x
## SUBSET LOCI ##

## handle ind.names, loc.names, chromosome, position, and alleles
if (is.character(j)){
j <- match(j, x@loc.names, nomatch = 0)
}
x@loc.names <- x@loc.names[j]
x@chromosome <- chr(x)[j]
x@position <- position(x)[j]
x@loc.all <- alleles(x)[j]
x@gen <- lapply(x@gen, function(e) e[j])
x@n.loc <- x@gen[[1]]@n.loc
x@gen <- lapply(x@gen, function(e) e[j])
x@n.loc <- x@gen[[1]]@n.loc

return(x)
}) # end [] for genlight
Expand Down
58 changes: 57 additions & 1 deletion tests/testthat/test_genlight.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,60 @@ test_that("population accessors work", {
expect_that(popNames(x), equals(c("replacement", "pop2")))
expect_error(popNames(x) <- NULL)
expect_error(popNames(x)[2] <- NA)
})
})

x <- "
X13049 X13050 X13051 X13052 X13053
AA36881 2 NA 2 2 2
AA36883 2 2 2 2 2
AA36884 2 2 2 2 2
AA36802 NA 2 2 2 2
AA36803 2 2 2 2 2
AA36804 2 NA 2 2 2
AA36181 2 NA 2 2 2
AA36183 2 2 2 2 2"

xxdf <- read.table(text = x)
xx <- new("genlight", xxdf, parallel = FALSE)
pop(xx) <- rep(LETTERS[1:2], each = 4)

test_that("missing data is properly subset with logical subscripts", {
skip_on_cran()
Apop <- pop(xx) == "A"
Bpop <- pop(xx) == "B"
expect_identical(NA.posi(xx), NA.posi(xx[]))
expect_identical(xxdf[Apop, ], as.data.frame(xx[Apop, ]))
expect_identical(xxdf[Bpop, ], as.data.frame(xx[Bpop, ]))
keepers <- c(FALSE, rep(TRUE, 4))
expect_identical(xxdf[keepers], as.data.frame(xx[, keepers]))
})

test_that("missing data is properly subset with positive subscripts", {
skip_on_cran()
rl <- sample(5)
# Can subset single locus
expect_identical(xxdf[, 1, drop = FALSE], as.data.frame(xx[, 1]))
# Can subset range of loci
expect_identical(xxdf[, 1:3, drop = FALSE], as.data.frame(xx[, 1:3]))
# Can subset by position
expect_identical(xxdf[, rl, drop = FALSE], as.data.frame(xx[, rl]))
})

test_that("missing data is properly subset with negative subscripts", {
skip_on_cran()
expect_identical(xxdf[, -1], as.data.frame(xx[, -1]))
expect_identical(xxdf[, -c(1, 3)], as.data.frame(xx[, -c(1, 3)]))
})

test_that("missing data is properly subset with a character vector", {
skip_on_cran()
lnames <- locNames(xx)
rl <- sample(lnames)
expect_identical(xxdf[, rl], as.data.frame(xx[, rl]))
expect_identical(xxdf[, lnames[1:2]], as.data.frame(xx[, lnames[1:2]]))
})

test_that("genlight objects do not take a mixture of positive and negative subscripts", {
skip_on_cran()
expect_error(xx[, c(2, -1)], "subscripts.")
})

0 comments on commit da6043a

Please sign in to comment.