Skip to content

Commit

Permalink
Merge pull request #538 from satijalab/feat/ReadMtx
Browse files Browse the repository at this point in the history
Add ReadMtx() to read local and remote mtx files
  • Loading branch information
andrewwbutler authored Mar 16, 2021
2 parents 855e271 + d15f7d9 commit 7787dde
Show file tree
Hide file tree
Showing 8 changed files with 327 additions and 7 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: Seurat
Version: 4.0.0.9014
Version: 4.0.0.9015
Date: 2021-03-15
Title: Tools for Single Cell Genomics
Description: A toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. 'Seurat' aims to enable users to identify and interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse types of single cell data. See Satija R, Farrell J, Gennert D, et al (2015) <doi:10.1038/nbt.3192>, Macosko E, Basu A, Satija R, et al (2015) <doi:10.1016/j.cell.2015.05.002>, Stuart T, Butler A, et al (2019) <doi:10.1016/j.cell.2019.05.031>, and Hao, Hao, et al (2020) <doi:10.1101/2020.10.12.335331> for more details.
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,7 @@ export(Radius)
export(Read10X)
export(Read10X_Image)
export(Read10X_h5)
export(ReadMtx)
export(ReadSlideSeq)
export(Reductions)
export(RegroupIdents)
Expand Down Expand Up @@ -556,7 +557,9 @@ importFrom(grid,unit)
importFrom(grid,viewport)
importFrom(httr,GET)
importFrom(httr,accept_json)
importFrom(httr,build_url)
importFrom(httr,content)
importFrom(httr,parse_url)
importFrom(httr,status_code)
importFrom(httr,timeout)
importFrom(ica,icafast)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Added
- Add direction option to `PlotClusterTree()`
- Add `cols` parameter to `JackStrawPlot()`
- Add `ReadMtx()` to read local and remote mtx files with associated cell and feature name files

## Changes
- Equality added to differential expression thresholds in `FindMarkers` (e.g, >= logfc.threshold rather than >)
Expand Down
192 changes: 192 additions & 0 deletions R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -1028,6 +1028,198 @@ Read10X_Image <- function(image.dir, filter.matrix = TRUE, ...) {
))
}

#' Load in data from remote or local mtx files
#'
#' Enables easy loading of sparse data matrices
#'
#' @param mtx Name or remote URL of the mtx file
#' @param cells Name or remote URL of the cells/barcodes file
#' @param features Name or remote URL of the features/genes file
#' @param cell.column Specify which column of cells file to use for cell names; default is 1
#' @param feature.column Specify which column of features files to use for feature/gene names; default is 2
#' @param skip.cell Number of lines to skip in the cells file before beginning to read cell names
#' @param skip.feature Number of lines to skip in the features file before beginning to gene names
#' @param unique.features Make feature names unique (default TRUE)
#' @param strip.suffix Remove trailing "-1" if present in all cell barcodes.
#'
#' @return A sparse matrix containing the expression data.
#'
#' @importFrom Matrix readMM
#' @importFrom utils read.delim
#' @importFrom httr build_url parse_url
#' @importFrom tools file_ext
#'
#'
#' @export
#' @concept preprocessing
#'
#' @examples
#' \dontrun{
#' # For local files:
#'
#' expression_matrix <- ReadMtx(genes="count_matrix.mtx.gz", features="features.tsv.gz", cells="barcodes.tsv.gz")
#' seurat_object <- CreateSeuratObject(counts = expression_matrix)
#'
#' # For remote files:
#'
#' expression_matrix <- ReadMtx(mtx = "http://localhost/matrix.mtx",
#' cells = "http://localhost/barcodes.tsv",
#' features = "http://localhost/genes.tsv")
#' seurat_object <- CreateSeuratObject(counts = data)
#' }
#'
ReadMtx <- function(
mtx,
cells,
features,
cell.column = 1,
feature.column = 2,
skip.cell = 0,
skip.feature = 0,
unique.features = TRUE,
strip.suffix = FALSE
) {
all.files <- list(
"expression matrix" = mtx,
"barcode list" = cells,
"feature list" = features
)
for (i in seq_along(along.with = all.files)) {
uri <- all.files[[i]]
err <- paste("Cannot find", names(x = all.files)[i], "at", uri)
uri <- build_url(url = parse_url(url = uri))
if (grepl(pattern = '^:///', x = uri)) {
uri <- gsub(pattern = '^:///', replacement = '', x = uri)
if (!file.exists(uri)) {
stop(err, call. = FALSE)
}
} else {
if (!Online(url = uri, seconds = 2L)) {
stop(err, call. = FALSE)
}
if (file_ext(uri) == 'gz') {
con <- url(description = uri)
uri <- gzcon(con = con, text = TRUE)
}
}
all.files[[i]] <- uri
}
cell.barcodes <- read.table(
file = all.files[['barcode list']],
header = FALSE,
sep = '\t',
row.names = NULL,
skip = skip.cell
)
feature.names <- read.table(
file = all.files[['feature list']],
header = FALSE,
sep = '\t',
row.names = NULL,
skip = skip.feature
)
# read barcodes
bcols <- ncol(x = cell.barcodes)
if (bcols < cell.column) {
stop(
"cell.column was set to ",
cell.column,
" but ",
cells,
" only has ",
bcols,
" columns.",
" Try setting the cell.column argument to a value <= to ",
bcols,
"."
)
}
cell.names <- cell.barcodes[, cell.column]
if (all(grepl(pattern = "\\-1$", x = cell.names)) & strip.suffix) {
cell.names <- as.vector(x = as.character(x = sapply(
X = cell.names,
FUN = ExtractField,
field = 1,
delim = "-"
)))
}
# read features
fcols <- ncol(x = feature.names)
if (fcols < feature.column) {
stop(
"feature.column was set to ",
feature.column,
" but ",
features,
" only has ",
fcols, " column(s).",
" Try setting the feature.column argument to a value <= to ",
fcols,
"."
)
}
if (any(is.na(x = feature.names[, feature.column]))) {
na.features <- which(x = is.na(x = feature.names[, feature.column]))
replacement.column <- ifelse(test = feature.column == 2, yes = 1, no = 2)
if (replacement.column > fcols) {
stop(
"Some features names are NA in column ",
feature.column,
". Try specifiying a different column.",
call. = FALSE
)
} else {
warning(
"Some features names are NA in column ",
feature.column,
". Replacing NA names with ID from column ",
replacement.column,
".",
call. = FALSE
)
}
feature.names[na.features, feature.column] <- feature.names[na.features, replacement.column]
}
feature.names <- feature.names[, feature.column]
if (unique.features) {
feature.names <- make.unique(names = feature.names)
}
data <- readMM(file = all.files[['expression matrix']])
if (length(x = cell.names) != ncol(x = data)) {
stop(
"Matrix has ",
ncol(data),
" columns but found ", length(cell.names),
" barcodes. ",
ifelse(
test = length(x = cell.names) > ncol(x = data),
yes = "Try increasing `skip.cell`. ",
no = ""
),
call. = FALSE
)
}
if (length(x = feature.names) != nrow(x = data)) {
stop(
"Matrix has ",
ncol(data),
" rows but found ", length(feature.names),
" features. ",
ifelse(
test = length(x = feature.names) > nrow(x = data),
yes = "Try increasing `skip.feature`. ",
no = ""
),
call. = FALSE
)
}

colnames(x = data) <- cell.names
rownames(x = data) <- feature.names
data <- as(data, Class = "dgCMatrix")
return(data)
}

#' Load Slide-seq spatial data
#'
#' @param coord.file Path to csv file containing bead coordinate positions
Expand Down
42 changes: 38 additions & 4 deletions R/utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -1984,14 +1984,14 @@ ModifyParam <- function(param, value) {
env2[[param]] <- value
}

# Give hints for old paramters and their newer counterparts
# Give hints for old parameters and their newer counterparts
#
# This is a non-exhaustive list. If your function isn't working properly based
# on the parameters you give it, please read the documentation for your function
#
# @param param A vector of paramters to get hints for
# @param param A vector of parameters to get hints for
#
# @return Parameter hints for the specified paramters
# @return Parameter hints for the specified parameters
#
OldParamHints <- function(param) {
param.conversion <- c(
Expand All @@ -2017,6 +2017,40 @@ OldParamHints <- function(param) {
return(param.conversion[param])
}

# Check if a web resource is available
#
# @param url A URL
# @param strict Perform a strict web availability test
# @param seconds Timeout in seconds
#
# @return \code{TRUE} if \url{is available} otherwise \code{FALSE}
#
#' @importFrom httr GET status_code timeout
#
# @keywords internal
#
Online <- function(url, strict = FALSE, seconds = 5L) {
if (isTRUE(x = strict)) {
code <- 200L
comp <- identical
} else {
code <- 404L
comp <- Negate(f = identical)
}
request <- tryCatch(
expr = GET(url = url, timeout(seconds = seconds)),
error = function(err) {
code <- if (grepl(pattern = '^Timeout was reached', x = err$message)) {
408L
} else {
404L
}
return(code)
}
)
return(comp(x = status_code(x = request), y = code))
}

# Check the existence of a package
#
# @param ... Package names
Expand Down Expand Up @@ -2044,7 +2078,7 @@ PackageCheck <- function(..., error = TRUE) {

# Parenting parameters from one environment to the next
#
# This function allows one to modifiy a parameter in a parent environement
# This function allows one to modify a parameter in a parent environment
# The primary use of this is to ensure logging functions store correct parameters
# if they've been modified by a child function or method
#
Expand Down
60 changes: 60 additions & 0 deletions man/ReadMtx.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/as.sparse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 7787dde

Please sign in to comment.