From d9ecef26c6120ed6b7e4dd5cc036e7d79acc6231 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 27 Feb 2021 14:56:25 -0500 Subject: [PATCH 01/20] Add ReadMtx() to read local and remote mtx files --- NAMESPACE | 3 + R/preprocessing.R | 152 +++++++++++++++++++++++++++++++++++++++++++++ man/FindMarkers.Rd | 1 + man/ReadMtx.Rd | 55 ++++++++++++++++ 4 files changed, 211 insertions(+) create mode 100644 man/ReadMtx.Rd diff --git a/NAMESPACE b/NAMESPACE index fdcfe8ea4..1816b81b6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -262,6 +262,7 @@ export(Radius) export(Read10X) export(Read10X_Image) export(Read10X_h5) +export(ReadMtx) export(ReadSlideSeq) export(Reductions) export(RegroupIdents) @@ -556,7 +557,9 @@ importFrom(grid,unit) importFrom(grid,viewport) importFrom(httr,GET) importFrom(httr,accept_json) +importFrom(httr,build_url) importFrom(httr,content) +importFrom(httr,parse_url) importFrom(httr,status_code) importFrom(httr,timeout) importFrom(ica,icafast) diff --git a/R/preprocessing.R b/R/preprocessing.R index aa8f5c5aa..5d69b6ed9 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -731,6 +731,158 @@ MULTIseqDemux <- function( return(object) } + +#' Load in data from remote or local mtx files +#' +#' Enables easy loading of sparse data matrices +#' +#' @param mtx Name or remote URL of the mtx file +#' @param cells Name or remote URL of the cells/barcodes file +#' @param features Name or remote URL of the features/genes file +#' @param gene.column Specify which column of features files to use for gene names; default is 2 +#' @param cell.column Specify which column of cells file to use for cell names; default is 1 +#' @param unique.features Make feature names unique (default TRUE) +#' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. +#' +#' @return A sparse matrix containing the expression data. +#' +#' @importFrom Matrix readMM +#' @importFrom utils read.delim +#' @importFrom httr build_url parse_url +#' @importFrom tools file_ext +#' +#' +#' @export +#' @concept preprocessing +#' +#' @examples +#' \dontrun{ +#' # For local files: +#' +#' expression_matrix <- ReadMtx(genes="count_matrix.mtx.gz", features="features.tsv.gz", cells="barcodes.tsv.gz") +#' seurat_object <- CreateSeuratObject(counts = expression_matrix) +#' +#' # For remote files: +#' +#' expression_matrix <- ReadMtx(mtx = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcount%5Fmatrix%2Emtx%2Egz", +#' cells ="https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcell%2Etsv%2Egz", +#' features = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fgene%2Etsv%2Egz", +#' feature.columns = 1) +#' seurat_object <- CreateSeuratObject(counts = data) +#' } +#' +ReadMtx <- function(mtx, + cells, + features, + cell.column = 1, + feature.column = 2, + unique.features = TRUE, + strip.suffix = FALSE){ + mtx <- build_url(url = parse_url(url = mtx)) + cells <- build_url(url = parse_url(url = cells)) + features <- build_url(url = parse_url(url = features)) + all_files <- list("Expression matrix" = mtx, "Barcode" = cells, "Gene name" = features) + + check_file_exists <- function(filetype, filepath){ + if (grepl(pattern = '^:///', x = filepath)) { + filepath <- gsub(pattern = ":///", replacement = "", x = filepath) + if (!file.exists(paths = filepath)) { + stop(paste(filetype, "file missing. Expecting", filepath), call. = FALSE) + } + } + } + + # check if all files exist + lapply(seq_along(all_files), function(y, n, i) { check_file_exists(n[[i]], y[[i]]) }, y=all_files, n=names(all_files)) + + # convenience fucntion to read local or remote tab delimited files + readTableUri <- function(uri){ + if (grepl(pattern = '^:///', x = uri)) { + textcontent <- read.table(file = uri, header = FALSE, sep = '\t', row.names = NULL) + } else{ + if (file_ext(uri)=="gz") { + textcontent <- read.table(file = gzcon(url(uri), text = TRUE), + header = FALSE, sep = '\t', row.names = NULL) + } else { + textcontent <- read.table(file = uri, header = FALSE, + sep = '\t', row.names = NULL) + } + } + return (textcontent) + } + + # read barcodes + cell.barcodes <- readTableUri(uri = cells) + bcols <- ncol(x = cell.barcodes) + if (bcols < cell.column) { + stop(paste0("cell.column was set to ", cell.column, + " but ", cells, " only has ", bcols, " columns.", + " Try setting the cell.column argument to a value <= to ", bcols, ".")) + } + cell.names <- cell.barcodes[, cell.column] + + if (all(grepl(pattern = "\\-1$", x = cell.names)) & strip.suffix) { + cell.names <- as.vector(x = as.character(x = sapply( + X = cell.names, + FUN = ExtractField, + field = 1, + delim = "-" + ))) + } + + # read features + feature.names <- readTableUri(uri = features) + fcols <- ncol(x = feature.names) + if (fcols < feature.column) { + stop(paste0("feature.column was set to ", feature.column, + " but ", features, " only has ", fcols, " column(s).", + " Try setting the feature.column argument to a value <= to ", fcols, ".")) + } + if (any(is.na(x = feature.names[, feature.column]))) { + na.features <- which(x = is.na(x = feature.names[, feature.column])) + replacement.column <- ifelse(test = feature.column == 2, yes = 1, no = 2) + if (replacement.column > fcols){ + stop( + paste0("Some features names are NA in column ", feature.column, + ". Try specifiying a different column."), + call. = FALSE, + immediate. = TRUE + ) + } else { + warning( + paste0("Some features names are NA in column ", feature.column, + ". Replacing NA names with ID from column ", replacement.column, "."), + call. = FALSE, + immediate. = TRUE + ) + } + feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column] + } + + feature.names <- feature.names[, feature.column] + if (unique.features) { + feature.names <- make.unique(names = feature.names) + } + + # read mtx + if (grepl(pattern = '^:///', x = mtx)) { + data <- readMM(mtx) + } else { + if (file_ext(mtx) == "gz"){ + data <- readMM(gzcon(url(mtx))) + } else { + data <- readMM(mtx) + } + } + + colnames(x = data) <- cell.names + rownames(x = data) <- feature.names + + return (data) +} + + + #' Load in data from 10X #' #' Enables easy loading of sparse data matrices provided by 10X genomics. diff --git a/man/FindMarkers.Rd b/man/FindMarkers.Rd index 19b23a781..099691bb4 100644 --- a/man/FindMarkers.Rd +++ b/man/FindMarkers.Rd @@ -76,6 +76,7 @@ FindMarkers(object, ...) min.cells.group = 3, pseudocount.use = 1, mean.fxn = rowMeans, + fc.name = NULL, ... ) diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd new file mode 100644 index 000000000..47154332b --- /dev/null +++ b/man/ReadMtx.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/preprocessing.R +\name{ReadMtx} +\alias{ReadMtx} +\title{Load in data from remote or local mtx files} +\usage{ +ReadMtx( + mtx, + cells, + features, + cell.column = 1, + feature.column = 2, + unique.features = TRUE, + strip.suffix = FALSE +) +} +\arguments{ +\item{mtx}{Name or remote URL of the mtx file} + +\item{cells}{Name or remote URL of the cells/barcodes file} + +\item{features}{Name or remote URL of the features/genes file} + +\item{cell.column}{Specify which column of cells file to use for cell names; default is 1} + +\item{unique.features}{Make feature names unique (default TRUE)} + +\item{strip.suffix}{Remove trailing "-1" if present in all cell barcodes.} + +\item{gene.column}{Specify which column of features files to use for gene names; default is 2} +} +\value{ +A sparse matrix containing the expression data. +} +\description{ +Enables easy loading of sparse data matrices +} +\examples{ +\dontrun{ +# For local files: + +expression_matrix <- ReadMtx(genes="count_matrix.mtx.gz", features="features.tsv.gz", cells="barcodes.tsv.gz") +seurat_object <- CreateSeuratObject(counts = expression_matrix) + +# For remote files: + +expression_matrix <- ReadMtx(mtx = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044\%5Fmixture\%5Fhg19\%5Fmm10\%5Fcount\%5Fmatrix\%2Emtx\%2Egz", +cells ="https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044\%5Fmixture\%5Fhg19\%5Fmm10\%5Fcell\%2Etsv\%2Egz", +features = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044\%5Fmixture\%5Fhg19\%5Fmm10\%5Fgene\%2Etsv\%2Egz", +feature.columns = 1) +seurat_object <- CreateSeuratObject(counts = data) +} + +} +\concept{preprocessing} From 8dbf8d64432de3868bd96ae46cdfae86b6f9390b Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 27 Feb 2021 15:05:26 -0500 Subject: [PATCH 02/20] Update doc --- R/preprocessing.R | 2 +- man/ReadMtx.Rd | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 5d69b6ed9..4cd09a775 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -739,7 +739,7 @@ MULTIseqDemux <- function( #' @param mtx Name or remote URL of the mtx file #' @param cells Name or remote URL of the cells/barcodes file #' @param features Name or remote URL of the features/genes file -#' @param gene.column Specify which column of features files to use for gene names; default is 2 +#' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 #' @param cell.column Specify which column of cells file to use for cell names; default is 1 #' @param unique.features Make feature names unique (default TRUE) #' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd index 47154332b..1a89ccbc6 100644 --- a/man/ReadMtx.Rd +++ b/man/ReadMtx.Rd @@ -23,11 +23,11 @@ ReadMtx( \item{cell.column}{Specify which column of cells file to use for cell names; default is 1} +\item{feature.column}{Specify which column of features files to use for feature/gene names; default is 2} + \item{unique.features}{Make feature names unique (default TRUE)} \item{strip.suffix}{Remove trailing "-1" if present in all cell barcodes.} - -\item{gene.column}{Specify which column of features files to use for gene names; default is 2} } \value{ A sparse matrix containing the expression data. From 63ba26f348ec1676f97c22d90ccdcffad734c25e Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 27 Feb 2021 15:15:22 -0500 Subject: [PATCH 03/20] Shorten doc example --- R/preprocessing.R | 7 +++---- man/ReadMtx.Rd | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 4cd09a775..00492cdca 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -764,10 +764,9 @@ MULTIseqDemux <- function( #' #' # For remote files: #' -#' expression_matrix <- ReadMtx(mtx = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcount%5Fmatrix%2Emtx%2Egz", -#' cells ="https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcell%2Etsv%2Egz", -#' features = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fgene%2Etsv%2Egz", -#' feature.columns = 1) +#' expression_matrix <- ReadMtx(mtx = "http://localhost/matrix.mtx", +#' cells = "http://localhost/barcodes.tsv", +#' features = "http://localhost/genes.tsv") #' seurat_object <- CreateSeuratObject(counts = data) #' } #' diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd index 1a89ccbc6..65c9b8243 100644 --- a/man/ReadMtx.Rd +++ b/man/ReadMtx.Rd @@ -44,10 +44,9 @@ seurat_object <- CreateSeuratObject(counts = expression_matrix) # For remote files: -expression_matrix <- ReadMtx(mtx = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044\%5Fmixture\%5Fhg19\%5Fmm10\%5Fcount\%5Fmatrix\%2Emtx\%2Egz", -cells ="https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044\%5Fmixture\%5Fhg19\%5Fmm10\%5Fcell\%2Etsv\%2Egz", -features = "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044\%5Fmixture\%5Fhg19\%5Fmm10\%5Fgene\%2Etsv\%2Egz", -feature.columns = 1) +expression_matrix <- ReadMtx(mtx = "http://localhost/matrix.mtx", +cells = "http://localhost/barcodes.tsv", +features = "http://localhost/genes.tsv") seurat_object <- CreateSeuratObject(counts = data) } From a17e100b7c1890d65d70f881d8c61f37214c5270 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 27 Feb 2021 15:33:39 -0500 Subject: [PATCH 04/20] Fix local file support --- R/preprocessing.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/preprocessing.R b/R/preprocessing.R index 00492cdca..20a14cd56 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -797,6 +797,7 @@ ReadMtx <- function(mtx, # convenience fucntion to read local or remote tab delimited files readTableUri <- function(uri){ if (grepl(pattern = '^:///', x = uri)) { + uri <- gsub(pattern = ":///", replacement = "", x = uri) textcontent <- read.table(file = uri, header = FALSE, sep = '\t', row.names = NULL) } else{ if (file_ext(uri)=="gz") { @@ -865,6 +866,7 @@ ReadMtx <- function(mtx, # read mtx if (grepl(pattern = '^:///', x = mtx)) { + mtx <- gsub(pattern = ":///", replacement = "", x = mtx) data <- readMM(mtx) } else { if (file_ext(mtx) == "gz"){ From e93aaeead598227874e96799a7d228d7078d3a01 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 4 Mar 2021 14:49:26 -0500 Subject: [PATCH 05/20] Reduce code duplication --- R/preprocessing.R | 371 +++++++++++++++++++++++++++------------------- R/utilities.R | 37 ++++- 2 files changed, 251 insertions(+), 157 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 20a14cd56..befa00d4f 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -731,159 +731,6 @@ MULTIseqDemux <- function( return(object) } - -#' Load in data from remote or local mtx files -#' -#' Enables easy loading of sparse data matrices -#' -#' @param mtx Name or remote URL of the mtx file -#' @param cells Name or remote URL of the cells/barcodes file -#' @param features Name or remote URL of the features/genes file -#' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 -#' @param cell.column Specify which column of cells file to use for cell names; default is 1 -#' @param unique.features Make feature names unique (default TRUE) -#' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. -#' -#' @return A sparse matrix containing the expression data. -#' -#' @importFrom Matrix readMM -#' @importFrom utils read.delim -#' @importFrom httr build_url parse_url -#' @importFrom tools file_ext -#' -#' -#' @export -#' @concept preprocessing -#' -#' @examples -#' \dontrun{ -#' # For local files: -#' -#' expression_matrix <- ReadMtx(genes="count_matrix.mtx.gz", features="features.tsv.gz", cells="barcodes.tsv.gz") -#' seurat_object <- CreateSeuratObject(counts = expression_matrix) -#' -#' # For remote files: -#' -#' expression_matrix <- ReadMtx(mtx = "http://localhost/matrix.mtx", -#' cells = "http://localhost/barcodes.tsv", -#' features = "http://localhost/genes.tsv") -#' seurat_object <- CreateSeuratObject(counts = data) -#' } -#' -ReadMtx <- function(mtx, - cells, - features, - cell.column = 1, - feature.column = 2, - unique.features = TRUE, - strip.suffix = FALSE){ - mtx <- build_url(url = parse_url(url = mtx)) - cells <- build_url(url = parse_url(url = cells)) - features <- build_url(url = parse_url(url = features)) - all_files <- list("Expression matrix" = mtx, "Barcode" = cells, "Gene name" = features) - - check_file_exists <- function(filetype, filepath){ - if (grepl(pattern = '^:///', x = filepath)) { - filepath <- gsub(pattern = ":///", replacement = "", x = filepath) - if (!file.exists(paths = filepath)) { - stop(paste(filetype, "file missing. Expecting", filepath), call. = FALSE) - } - } - } - - # check if all files exist - lapply(seq_along(all_files), function(y, n, i) { check_file_exists(n[[i]], y[[i]]) }, y=all_files, n=names(all_files)) - - # convenience fucntion to read local or remote tab delimited files - readTableUri <- function(uri){ - if (grepl(pattern = '^:///', x = uri)) { - uri <- gsub(pattern = ":///", replacement = "", x = uri) - textcontent <- read.table(file = uri, header = FALSE, sep = '\t', row.names = NULL) - } else{ - if (file_ext(uri)=="gz") { - textcontent <- read.table(file = gzcon(url(uri), text = TRUE), - header = FALSE, sep = '\t', row.names = NULL) - } else { - textcontent <- read.table(file = uri, header = FALSE, - sep = '\t', row.names = NULL) - } - } - return (textcontent) - } - - # read barcodes - cell.barcodes <- readTableUri(uri = cells) - bcols <- ncol(x = cell.barcodes) - if (bcols < cell.column) { - stop(paste0("cell.column was set to ", cell.column, - " but ", cells, " only has ", bcols, " columns.", - " Try setting the cell.column argument to a value <= to ", bcols, ".")) - } - cell.names <- cell.barcodes[, cell.column] - - if (all(grepl(pattern = "\\-1$", x = cell.names)) & strip.suffix) { - cell.names <- as.vector(x = as.character(x = sapply( - X = cell.names, - FUN = ExtractField, - field = 1, - delim = "-" - ))) - } - - # read features - feature.names <- readTableUri(uri = features) - fcols <- ncol(x = feature.names) - if (fcols < feature.column) { - stop(paste0("feature.column was set to ", feature.column, - " but ", features, " only has ", fcols, " column(s).", - " Try setting the feature.column argument to a value <= to ", fcols, ".")) - } - if (any(is.na(x = feature.names[, feature.column]))) { - na.features <- which(x = is.na(x = feature.names[, feature.column])) - replacement.column <- ifelse(test = feature.column == 2, yes = 1, no = 2) - if (replacement.column > fcols){ - stop( - paste0("Some features names are NA in column ", feature.column, - ". Try specifiying a different column."), - call. = FALSE, - immediate. = TRUE - ) - } else { - warning( - paste0("Some features names are NA in column ", feature.column, - ". Replacing NA names with ID from column ", replacement.column, "."), - call. = FALSE, - immediate. = TRUE - ) - } - feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column] - } - - feature.names <- feature.names[, feature.column] - if (unique.features) { - feature.names <- make.unique(names = feature.names) - } - - # read mtx - if (grepl(pattern = '^:///', x = mtx)) { - mtx <- gsub(pattern = ":///", replacement = "", x = mtx) - data <- readMM(mtx) - } else { - if (file_ext(mtx) == "gz"){ - data <- readMM(gzcon(url(mtx))) - } else { - data <- readMM(mtx) - } - } - - colnames(x = data) <- cell.names - rownames(x = data) <- feature.names - - return (data) -} - - - #' Load in data from 10X #' #' Enables easy loading of sparse data matrices provided by 10X genomics. @@ -1181,6 +1028,224 @@ Read10X_Image <- function(image.dir, filter.matrix = TRUE, ...) { )) } +#' Load in data from remote or local mtx files +#' +#' Enables easy loading of sparse data matrices +#' +#' @param mtx Name or remote URL of the mtx file +#' @param cells Name or remote URL of the cells/barcodes file +#' @param features Name or remote URL of the features/genes file +#' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 +#' @param cell.column Specify which column of cells file to use for cell names; default is 1 +#' @param unique.features Make feature names unique (default TRUE) +#' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. +#' +#' @return A sparse matrix containing the expression data. +#' +#' @importFrom Matrix readMM +#' @importFrom utils read.delim +#' @importFrom httr build_url parse_url +#' @importFrom tools file_ext +#' +#' +#' @export +#' @concept preprocessing +#' +#' @examples +#' \dontrun{ +#' # For local files: +#' +#' expression_matrix <- ReadMtx(genes="count_matrix.mtx.gz", features="features.tsv.gz", cells="barcodes.tsv.gz") +#' seurat_object <- CreateSeuratObject(counts = expression_matrix) +#' +#' # For remote files: +#' +#' expression_matrix <- ReadMtx(mtx = "http://localhost/matrix.mtx", +#' cells = "http://localhost/barcodes.tsv", +#' features = "http://localhost/genes.tsv") +#' seurat_object <- CreateSeuratObject(counts = data) +#' } +#' +ReadMtx <- function( + mtx, + cells, + features, + cell.column = 1, + feature.column = 2, + unique.features = TRUE, + strip.suffix = FALSE +) { + all.files <- list( + "expression matrix" = mtx, + "barcode list" = cells, + "feature list" = features + ) + for (i in seq_along(along.with = all.files)) { + uri <- all.files[[i]] + err <- paste("Cannot find", names(x = all.files)[i], "at", uri) + uri <- build_url(url = parse_url(url = uri)) + if (grepl(pattern = '^:///', x = uri)) { + uri <- gsub(pattern = '^:///', replacement = '', x = uri) + if (!file.exists(uri)) { + stop(err, call. = FALSE) + } + } else { + if (!Online(url = uri, seconds = 2L)) { + stop(err, call. = FALSE) + } + if (file_ext(uri) == 'gz') { + uri <- gzcon(con = url(description = uri)) + on.exit(expr = close(con = uri), add = TRUE) + } + } + all.files[[i]] <- uri + } + cell.barcodes <- read.table( + file = all.files[['feature list']], + header = FALSE, + sep = '\t', + row.names = NULL + ) + feature.names <- read.table( + file = all.files[['barcode list']], + header = FALSE, + sep = '\t', + row.names = NULL + ) + mtx <- readMM(file = all.files[['expression matrix']]) + # mtx <- build_url(url = parse_url(url = mtx)) + # cells <- build_url(url = parse_url(url = cells)) + # features <- build_url(url = parse_url(url = features)) + # all_files <- list( + # "Expression matrix" = mtx, + # "Barcode" = cells, + # "Gene name" = features + # ) + # check_file_exists <- function(filetype, filepath){ + # if (grepl(pattern = '^:///', x = filepath)) { + # filepath <- gsub(pattern = ":///", replacement = "", x = filepath) + # if (!file.exists(paths = filepath)) { + # stop(filetype, " file missing. Expecting ", filepath, call. = FALSE) + # } + # } + # } + # # check if all files exist + # lapply( + # X = seq_along(all_files), + # FUN = function(y, n, i) { check_file_exists(n[[i]], y[[i]]) }, + # y = all_files, + # n = names(x = all_files) + # ) + # # convenience function to read local or remote tab delimited files + # readTableUri <- function(uri){ + # if (grepl(pattern = '^:///', x = uri)) { + # uri <- gsub(pattern = ":///", replacement = "", x = uri) + # textcontent <- read.table(file = uri, header = FALSE, sep = '\t', row.names = NULL) + # } else{ + # if (file_ext(uri) == "gz") { + # textcontent <- read.table( + # file = gzcon(url(uri), text = TRUE), + # header = FALSE, + # sep = '\t', + # row.names = NULL + # ) + # } else { + # textcontent <- read.table( + # file = uri, + # header = FALSE, + # sep = '\t', + # row.names = NULL + # ) + # } + # } + # return(textcontent) + # } + # # read barcodes + # cell.barcodes <- readTableUri(uri = cells) + bcols <- ncol(x = cell.barcodes) + if (bcols < cell.column) { + stop( + "cell.column was set to ", + cell.column, + " but ", + cells, + " only has ", + bcols, + " columns.", + " Try setting the cell.column argument to a value <= to ", + bcols, + "." + ) + } + cell.names <- cell.barcodes[, cell.column] + if (all(grepl(pattern = "\\-1$", x = cell.names)) & strip.suffix) { + cell.names <- as.vector(x = as.character(x = sapply( + X = cell.names, + FUN = ExtractField, + field = 1, + delim = "-" + ))) + } + # # read features + # feature.names <- readTableUri(uri = features) + fcols <- ncol(x = feature.names) + if (fcols < feature.column) { + stop( + "feature.column was set to ", + feature.column, + " but ", + features, + " only has ", + fcols, " column(s).", + " Try setting the feature.column argument to a value <= to ", + fcols, + "." + ) + } + if (any(is.na(x = feature.names[, feature.column]))) { + na.features <- which(x = is.na(x = feature.names[, feature.column])) + replacement.column <- ifelse(test = feature.column == 2, yes = 1, no = 2) + if (replacement.column > fcols) { + stop( + "Some features names are NA in column ", + feature.column, + ". Try specifiying a different column.", + call. = FALSE, + immediate. = TRUE + ) + } else { + warning( + "Some features names are NA in column ", + feature.column, + ". Replacing NA names with ID from column ", + replacement.column, + ".", + call. = FALSE, + immediate. = TRUE + ) + } + feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column] + } + feature.names <- feature.names[, feature.column] + if (unique.features) { + feature.names <- make.unique(names = feature.names) + } + # # read mtx + # if (grepl(pattern = '^:///', x = mtx)) { + # mtx <- gsub(pattern = ":///", replacement = "", x = mtx) + # data <- readMM(mtx) + # } else { + # if (file_ext(mtx) == "gz") { + # data <- readMM(gzcon(url(mtx))) + # } else { + # data <- readMM(mtx) + # } + # } + colnames(x = data) <- cell.names + rownames(x = data) <- feature.names + return(data) +} + #' Load Slide-seq spatial data #' #' @param coord.file Path to csv file containing bead coordinate positions diff --git a/R/utilities.R b/R/utilities.R index fe227d1f7..410fccd86 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -1984,14 +1984,14 @@ ModifyParam <- function(param, value) { env2[[param]] <- value } -# Give hints for old paramters and their newer counterparts +# Give hints for old parameters and their newer counterparts # # This is a non-exhaustive list. If your function isn't working properly based # on the parameters you give it, please read the documentation for your function # -# @param param A vector of paramters to get hints for +# @param param A vector of parameters to get hints for # -# @return Parameter hints for the specified paramters +# @return Parameter hints for the specified parameters # OldParamHints <- function(param) { param.conversion <- c( @@ -2017,6 +2017,35 @@ OldParamHints <- function(param) { return(param.conversion[param]) } +# Check if a web resource is available +# +# @param url A URL +# @param strict Perform a strict web availability test +# @param seconds Timeout in seconds +# +# @return \code{TRUE} if \url{is available} otherwise \code{FALSE} +# +#' @importFrom httr GET status_code timeout +# +# @keywords internal +# +Online <- function(url, strict = FALSE, seconds = 5L) { + if (isTRUE(x = strict)) { + code <- 200L + comp <- identical + } else { + code <- 404L + comp <- Negate(f = identical) + } + request <- tryCatch( + expr = GET(url = url, timeout(seconds = seconds)), + error = function(...) { + return(404L) + } + ) + return(comp(x = status_code(x = request), y = code)) +} + # Check the existence of a package # # @param ... Package names @@ -2044,7 +2073,7 @@ PackageCheck <- function(..., error = TRUE) { # Parenting parameters from one environment to the next # -# This function allows one to modifiy a parameter in a parent environement +# This function allows one to modify a parameter in a parent environment # The primary use of this is to ensure logging functions store correct parameters # if they've been modified by a child function or method # From 2989d57e2ce3fd3cea7614b087ea85fe88e3d80a Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 4 Mar 2021 15:12:12 -0500 Subject: [PATCH 06/20] Fixes for Online --- R/utilities.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/utilities.R b/R/utilities.R index 410fccd86..d88aba0a4 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2040,7 +2040,12 @@ Online <- function(url, strict = FALSE, seconds = 5L) { request <- tryCatch( expr = GET(url = url, timeout(seconds = seconds)), error = function(...) { - return(404L) + code <- if (grepl(pattern = 'recieved')) { + 408L + } else { + 404L + } + return(code) } ) return(comp(x = status_code(x = request), y = code)) From 67500daaf69dcaa131f684d8cf1220921123fea5 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Thu, 4 Mar 2021 15:15:41 -0500 Subject: [PATCH 07/20] Fixes for Online --- R/utilities.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/utilities.R b/R/utilities.R index d88aba0a4..287f07019 100644 --- a/R/utilities.R +++ b/R/utilities.R @@ -2039,8 +2039,8 @@ Online <- function(url, strict = FALSE, seconds = 5L) { } request <- tryCatch( expr = GET(url = url, timeout(seconds = seconds)), - error = function(...) { - code <- if (grepl(pattern = 'recieved')) { + error = function(err) { + code <- if (grepl(pattern = '^Timeout was reached', x = err$message)) { 408L } else { 404L From 08b3b20e6bd0dc63ee79680261f7ed74d53a0a4f Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 4 Mar 2021 15:49:55 -0500 Subject: [PATCH 08/20] Fix variable name for data --- R/preprocessing.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index befa00d4f..01a655572 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1100,6 +1100,7 @@ ReadMtx <- function( } all.files[[i]] <- uri } + message(all.files) cell.barcodes <- read.table( file = all.files[['feature list']], header = FALSE, @@ -1112,7 +1113,7 @@ ReadMtx <- function( sep = '\t', row.names = NULL ) - mtx <- readMM(file = all.files[['expression matrix']]) + data <- readMM(file = all.files[['expression matrix']]) # mtx <- build_url(url = parse_url(url = mtx)) # cells <- build_url(url = parse_url(url = cells)) # features <- build_url(url = parse_url(url = features)) From 82e843b6a6acb4317617fdaf2e5e31f878b58403 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Thu, 4 Mar 2021 16:46:17 -0500 Subject: [PATCH 09/20] Fix swapped feature/cell, add text=T for gzcon --- R/preprocessing.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 01a655572..4547fe510 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1094,26 +1094,24 @@ ReadMtx <- function( stop(err, call. = FALSE) } if (file_ext(uri) == 'gz') { - uri <- gzcon(con = url(description = uri)) + uri <- gzcon(con = url(description = uri), text = TRUE) on.exit(expr = close(con = uri), add = TRUE) } } all.files[[i]] <- uri } - message(all.files) cell.barcodes <- read.table( - file = all.files[['feature list']], + file = all.files[['barcode list']], header = FALSE, sep = '\t', row.names = NULL ) feature.names <- read.table( - file = all.files[['barcode list']], + file = all.files[['feature list']], header = FALSE, sep = '\t', row.names = NULL ) - data <- readMM(file = all.files[['expression matrix']]) # mtx <- build_url(url = parse_url(url = mtx)) # cells <- build_url(url = parse_url(url = cells)) # features <- build_url(url = parse_url(url = features)) @@ -1242,6 +1240,8 @@ ReadMtx <- function( # data <- readMM(mtx) # } # } + data <- readMM(file = all.files[['expression matrix']]) + colnames(x = data) <- cell.names rownames(x = data) <- feature.names return(data) From c8be04baa7e8637cdcd72fd8b26fc7c5c47018ed Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Fri, 5 Mar 2021 18:17:18 -0500 Subject: [PATCH 10/20] Remove on.exit as connections autoclose --- R/preprocessing.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 4547fe510..8e095f19f 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1094,8 +1094,8 @@ ReadMtx <- function( stop(err, call. = FALSE) } if (file_ext(uri) == 'gz') { - uri <- gzcon(con = url(description = uri), text = TRUE) - on.exit(expr = close(con = uri), add = TRUE) + con <- url(description = uri) + uri <- gzcon(con = con, text = TRUE) } } all.files[[i]] <- uri From 2d4e1d27a702b9a49b38c61859726d0e16135f52 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 6 Mar 2021 00:31:47 -0500 Subject: [PATCH 11/20] Clean up --- R/preprocessing.R | 65 +++-------------------------------------------- 1 file changed, 3 insertions(+), 62 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 8e095f19f..b1323e2b9 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1112,55 +1112,7 @@ ReadMtx <- function( sep = '\t', row.names = NULL ) - # mtx <- build_url(url = parse_url(url = mtx)) - # cells <- build_url(url = parse_url(url = cells)) - # features <- build_url(url = parse_url(url = features)) - # all_files <- list( - # "Expression matrix" = mtx, - # "Barcode" = cells, - # "Gene name" = features - # ) - # check_file_exists <- function(filetype, filepath){ - # if (grepl(pattern = '^:///', x = filepath)) { - # filepath <- gsub(pattern = ":///", replacement = "", x = filepath) - # if (!file.exists(paths = filepath)) { - # stop(filetype, " file missing. Expecting ", filepath, call. = FALSE) - # } - # } - # } - # # check if all files exist - # lapply( - # X = seq_along(all_files), - # FUN = function(y, n, i) { check_file_exists(n[[i]], y[[i]]) }, - # y = all_files, - # n = names(x = all_files) - # ) - # # convenience function to read local or remote tab delimited files - # readTableUri <- function(uri){ - # if (grepl(pattern = '^:///', x = uri)) { - # uri <- gsub(pattern = ":///", replacement = "", x = uri) - # textcontent <- read.table(file = uri, header = FALSE, sep = '\t', row.names = NULL) - # } else{ - # if (file_ext(uri) == "gz") { - # textcontent <- read.table( - # file = gzcon(url(uri), text = TRUE), - # header = FALSE, - # sep = '\t', - # row.names = NULL - # ) - # } else { - # textcontent <- read.table( - # file = uri, - # header = FALSE, - # sep = '\t', - # row.names = NULL - # ) - # } - # } - # return(textcontent) - # } - # # read barcodes - # cell.barcodes <- readTableUri(uri = cells) + # read barcodes bcols <- ncol(x = cell.barcodes) if (bcols < cell.column) { stop( @@ -1185,8 +1137,7 @@ ReadMtx <- function( delim = "-" ))) } - # # read features - # feature.names <- readTableUri(uri = features) + # read features fcols <- ncol(x = feature.names) if (fcols < feature.column) { stop( @@ -1229,17 +1180,7 @@ ReadMtx <- function( if (unique.features) { feature.names <- make.unique(names = feature.names) } - # # read mtx - # if (grepl(pattern = '^:///', x = mtx)) { - # mtx <- gsub(pattern = ":///", replacement = "", x = mtx) - # data <- readMM(mtx) - # } else { - # if (file_ext(mtx) == "gz") { - # data <- readMM(gzcon(url(mtx))) - # } else { - # data <- readMM(mtx) - # } - # } + data <- readMM(file = all.files[['expression matrix']]) colnames(x = data) <- cell.names From 8fc52a5f0a4fc4a8f96371436938a67fe9c811ca Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 6 Mar 2021 01:35:27 -0500 Subject: [PATCH 12/20] Add tests --- R/preprocessing.R | 42 +++++++++++++++++++++++++++------- man/ReadMtx.Rd | 6 +++++ tests/testthat/test_read_mtx.R | 32 ++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 8 deletions(-) create mode 100644 tests/testthat/test_read_mtx.R diff --git a/R/preprocessing.R b/R/preprocessing.R index b1323e2b9..cfac4118e 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1037,6 +1037,8 @@ Read10X_Image <- function(image.dir, filter.matrix = TRUE, ...) { #' @param features Name or remote URL of the features/genes file #' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 #' @param cell.column Specify which column of cells file to use for cell names; default is 1 +#' @param skip.cell Number of lines to skip in the cells file before beginning to read cell names +#' @param skip.feature Number of lines to skip in the features file before beginning to gene names #' @param unique.features Make feature names unique (default TRUE) #' @param strip.suffix Remove trailing "-1" if present in all cell barcodes. #' @@ -1072,6 +1074,8 @@ ReadMtx <- function( features, cell.column = 1, feature.column = 2, + skip.cell = 0, + skip.feature = 0, unique.features = TRUE, strip.suffix = FALSE ) { @@ -1104,13 +1108,15 @@ ReadMtx <- function( file = all.files[['barcode list']], header = FALSE, sep = '\t', - row.names = NULL + row.names = NULL, + skip = skip.cell ) feature.names <- read.table( file = all.files[['feature list']], header = FALSE, sep = '\t', - row.names = NULL + row.names = NULL, + skip = skip.feature ) # read barcodes bcols <- ncol(x = cell.barcodes) @@ -1160,9 +1166,8 @@ ReadMtx <- function( "Some features names are NA in column ", feature.column, ". Try specifiying a different column.", - call. = FALSE, - immediate. = TRUE - ) + call. = FALSE + ) } else { warning( "Some features names are NA in column ", @@ -1170,9 +1175,8 @@ ReadMtx <- function( ". Replacing NA names with ID from column ", replacement.column, ".", - call. = FALSE, - immediate. = TRUE - ) + call. = FALSE + ) } feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column] } @@ -1183,8 +1187,30 @@ ReadMtx <- function( data <- readMM(file = all.files[['expression matrix']]) + if (length(cell.names)!=ncol(data)){ + stop( + "Matrix has ", + ncol(data), + " columns but found ", length(cell.names), + " barcodes. ", + ifelse(test = length(cell.names) > ncol(data), yes = "Try increasing `skip.cell`. ", no = ""), + call. = FALSE + ) + } + if (length(feature.names)!=nrow(data)){ + stop( + "Matrix has ", + ncol(data), + " rows but found ", length(feature.names), + " features. ", + ifelse(test = length(feature.names) > nrow(data), yes = "Try increasing `skip.feature`. ", no = ""), + call. = FALSE + ) + } + colnames(x = data) <- cell.names rownames(x = data) <- feature.names + data <- as(data, Class="dgCMatrix") return(data) } diff --git a/man/ReadMtx.Rd b/man/ReadMtx.Rd index 65c9b8243..c11d20426 100644 --- a/man/ReadMtx.Rd +++ b/man/ReadMtx.Rd @@ -10,6 +10,8 @@ ReadMtx( features, cell.column = 1, feature.column = 2, + skip.cell = 0, + skip.feature = 0, unique.features = TRUE, strip.suffix = FALSE ) @@ -25,6 +27,10 @@ ReadMtx( \item{feature.column}{Specify which column of features files to use for feature/gene names; default is 2} +\item{skip.cell}{Number of lines to skip in the cells file before beginning to read cell names} + +\item{skip.feature}{Number of lines to skip in the features file before beginning to gene names} + \item{unique.features}{Make feature names unique (default TRUE)} \item{strip.suffix}{Remove trailing "-1" if present in all cell barcodes.} diff --git a/tests/testthat/test_read_mtx.R b/tests/testthat/test_read_mtx.R new file mode 100644 index 000000000..7c8dc1615 --- /dev/null +++ b/tests/testthat/test_read_mtx.R @@ -0,0 +1,32 @@ +context("ReadMtx") + +mtx <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_matrix.mtx.gz" +features <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_genes.csv.gz" +cells <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_barcodes.csv.gz" +counts1 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1, skip.cell = 1, skip.feature = 1) + + +mtx <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcount%5Fmatrix%2Emtx%2Egz" +cells <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcell%2Etsv%2Egz" +features <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fgene%2Etsv%2Egz" +counts2 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1) + + +test_that("skip.cell and skip.feature work", { + expect_is(counts1, "dgCMatrix") + expect_equal(ncol(counts1), 1436) + expect_equal(nrow(counts1), 29445) + expect_equal(colnames(counts1)[5], "MD5828a_GGGCATCCAATGAAAC-1") + expect_equal(rownames(counts1)[2], "A1BG-AS1") +}) + + +test_that("ReadMtx works", { + expect_is(counts2, "dgCMatrix") + expect_equal(ncol(counts2), 27714) + expect_equal(nrow(counts2), 62046) + expect_equal(colnames(counts2)[1], "Mixture1.Smart-seq2.p2_A4") + expect_equal(rownames(counts2)[2], "hg19_ENSG00000000003_hg19_TSPAN6") +}) + + From 94411859e80a1a94131bbea0ba5a6ea949fffe23 Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 6 Mar 2021 14:20:30 -0500 Subject: [PATCH 13/20] Swap test --- tests/testthat/test_read_mtx.R | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test_read_mtx.R b/tests/testthat/test_read_mtx.R index 7c8dc1615..5618d5a8d 100644 --- a/tests/testthat/test_read_mtx.R +++ b/tests/testthat/test_read_mtx.R @@ -6,9 +6,9 @@ cells <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126 counts1 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1, skip.cell = 1, skip.feature = 1) -mtx <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcount%5Fmatrix%2Emtx%2Egz" -cells <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fcell%2Etsv%2Egz" -features <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE132044&format=file&file=GSE132044%5Fmixture%5Fhg19%5Fmm10%5Fgene%2Etsv%2Egz" +mtx <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fmatrix%2Emtx%2Egz" +cells <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fbarcodes%2Etsv%2Egz" +features <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fgenes%2Etsv%2Egz" counts2 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1) @@ -23,10 +23,10 @@ test_that("skip.cell and skip.feature work", { test_that("ReadMtx works", { expect_is(counts2, "dgCMatrix") - expect_equal(ncol(counts2), 27714) - expect_equal(nrow(counts2), 62046) - expect_equal(colnames(counts2)[1], "Mixture1.Smart-seq2.p2_A4") - expect_equal(rownames(counts2)[2], "hg19_ENSG00000000003_hg19_TSPAN6") + expect_equal(ncol(counts2), 22063) + expect_equal(nrow(counts2), 22530) + expect_equal(colnames(counts2)[1], "AAACCTGAGCAATCTC-1") + expect_equal(rownames(counts2)[2], "ENSPPAG00000006288") }) From bb52f35df8318efdf4d1e4e1a311e529f0bddbcc Mon Sep 17 00:00:00 2001 From: Saket Choudhary Date: Sat, 6 Mar 2021 14:46:28 -0500 Subject: [PATCH 14/20] Fix test --- tests/testthat/test_read_mtx.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test_read_mtx.R b/tests/testthat/test_read_mtx.R index 5618d5a8d..f50bd429b 100644 --- a/tests/testthat/test_read_mtx.R +++ b/tests/testthat/test_read_mtx.R @@ -26,7 +26,7 @@ test_that("ReadMtx works", { expect_equal(ncol(counts2), 22063) expect_equal(nrow(counts2), 22530) expect_equal(colnames(counts2)[1], "AAACCTGAGCAATCTC-1") - expect_equal(rownames(counts2)[2], "ENSPPAG00000006288") + expect_equal(rownames(counts2)[2], "ENSPPAG00000040697") }) From 539d168856555cdc588edda60c5a3d76f4783107 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 15 Mar 2021 17:10:17 -0400 Subject: [PATCH 15/20] Style fixes --- R/preprocessing.R | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 2bf87e65e..6a3c7938c 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1184,33 +1184,39 @@ ReadMtx <- function( if (unique.features) { feature.names <- make.unique(names = feature.names) } - data <- readMM(file = all.files[['expression matrix']]) - - if (length(cell.names)!=ncol(data)){ + if (length(x = cell.names) != ncol(x = data)) { stop( "Matrix has ", ncol(data), " columns but found ", length(cell.names), " barcodes. ", - ifelse(test = length(cell.names) > ncol(data), yes = "Try increasing `skip.cell`. ", no = ""), + ifelse( + test = length(x = cell.names) > ncol(x = data), + yes = "Try increasing `skip.cell`. ", + no = "" + ), call. = FALSE ) } - if (length(feature.names)!=nrow(data)){ + if (length(x = feature.names) != nrow(x = data)) { stop( "Matrix has ", ncol(data), " rows but found ", length(feature.names), " features. ", - ifelse(test = length(feature.names) > nrow(data), yes = "Try increasing `skip.feature`. ", no = ""), + ifelse( + test = length(x = feature.names) > nrow(x = data), + yes = "Try increasing `skip.feature`. ", + no = "" + ), call. = FALSE ) } colnames(x = data) <- cell.names rownames(x = data) <- feature.names - data <- as(data, Class="dgCMatrix") + data <- as(data, Class = "dgCMatrix") return(data) } From 06b6333dccd7346743f78d5f61e0c50df596d943 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 15 Mar 2021 17:10:33 -0400 Subject: [PATCH 16/20] devtools::document updated this --- man/as.sparse.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/as.sparse.Rd b/man/as.sparse.Rd index b0cdaae48..1872e4fe8 100644 --- a/man/as.sparse.Rd +++ b/man/as.sparse.Rd @@ -20,8 +20,8 @@ \item{...}{Arguments passed to other methods} -\item{row.names}{\code{NULL} or a character vector giving the row - names for the data frame. Missing values are not allowed.} +\item{row.names}{\code{NULL} or a character vector giving the row names for +the data; missing values are not allowed} \item{optional}{logical. If \code{TRUE}, setting row names and converting column names (to syntactic names: see From 3bf40b6301d25676aedab4e24dddbf3a3afd68be Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 15 Mar 2021 17:11:38 -0400 Subject: [PATCH 17/20] Minor fixes --- R/preprocessing.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/preprocessing.R b/R/preprocessing.R index 6a3c7938c..2408cd698 100644 --- a/R/preprocessing.R +++ b/R/preprocessing.R @@ -1035,8 +1035,8 @@ Read10X_Image <- function(image.dir, filter.matrix = TRUE, ...) { #' @param mtx Name or remote URL of the mtx file #' @param cells Name or remote URL of the cells/barcodes file #' @param features Name or remote URL of the features/genes file -#' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 #' @param cell.column Specify which column of cells file to use for cell names; default is 1 +#' @param feature.column Specify which column of features files to use for feature/gene names; default is 2 #' @param skip.cell Number of lines to skip in the cells file before beginning to read cell names #' @param skip.feature Number of lines to skip in the features file before beginning to gene names #' @param unique.features Make feature names unique (default TRUE) From 4aa73fcc4e9f685a5202dcafd0401758cc04b519 Mon Sep 17 00:00:00 2001 From: Paul Hoffman Date: Mon, 15 Mar 2021 17:20:08 -0400 Subject: [PATCH 18/20] Attempt to trigger GH Actions From c5ad8a7b5b0b77cc9e2b4208661f5cdc456d3bac Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 16 Mar 2021 00:12:48 -0400 Subject: [PATCH 19/20] bump version, update NEWS --- DESCRIPTION | 2 +- NEWS.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 7e7574c8d..1a8a7c042 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: Seurat -Version: 4.0.0.9014 +Version: 4.0.0.9015 Date: 2021-03-15 Title: Tools for Single Cell Genomics Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) , Macosko E, Basu A, Satija R, et al (2015) , Stuart T, Butler A, et al (2019) , and Hao, Hao, et al (2020) for more details. diff --git a/NEWS.md b/NEWS.md index ce1046e89..71043ae1a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ## Added - Add direction option to `PlotClusterTree()` - Add `cols` parameter to `JackStrawPlot()` +- Add `ReadMtx()` to read local and remote mtx files with associated cell and feature name files ## Changes - Equality added to differential expression thresholds in `FindMarkers` (e.g, >= logfc.threshold rather than >) From d15f7d92db1a4b6a1d56aadf8cc51f4d11034785 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 16 Mar 2021 16:06:51 -0400 Subject: [PATCH 20/20] skip ReadMtx tests on CRAN --- tests/testthat/test_read_mtx.R | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tests/testthat/test_read_mtx.R b/tests/testthat/test_read_mtx.R index f50bd429b..75707decc 100644 --- a/tests/testthat/test_read_mtx.R +++ b/tests/testthat/test_read_mtx.R @@ -1,18 +1,11 @@ context("ReadMtx") -mtx <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_matrix.mtx.gz" -features <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_genes.csv.gz" -cells <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_barcodes.csv.gz" -counts1 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1, skip.cell = 1, skip.feature = 1) - - -mtx <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fmatrix%2Emtx%2Egz" -cells <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fbarcodes%2Etsv%2Egz" -features <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fgenes%2Etsv%2Egz" -counts2 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1) - - test_that("skip.cell and skip.feature work", { + skip_on_cran() + mtx <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_matrix.mtx.gz" + features <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_genes.csv.gz" + cells <- "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE126nnn/GSE126836/suppl/GSE126836_SN_MD5828_barcodes.csv.gz" + counts1 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1, skip.cell = 1, skip.feature = 1) expect_is(counts1, "dgCMatrix") expect_equal(ncol(counts1), 1436) expect_equal(nrow(counts1), 29445) @@ -22,6 +15,11 @@ test_that("skip.cell and skip.feature work", { test_that("ReadMtx works", { + skip_on_cran() + mtx <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fmatrix%2Emtx%2Egz" + cells <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fbarcodes%2Etsv%2Egz" + features <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE127774&format=file&file=GSE127774%5FACC%5FB%5Fgenes%2Etsv%2Egz" + counts2 <- ReadMtx(mtx = mtx, cells = cells, features = features, feature.column = 1) expect_is(counts2, "dgCMatrix") expect_equal(ncol(counts2), 22063) expect_equal(nrow(counts2), 22530)