Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Modify Read10X for GEO compatibility #4101

Merged
merged 1 commit into from
Feb 27, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion R/preprocessing.R
Original file line number Diff line number Diff line change
Expand Up @@ -739,6 +739,10 @@ MULTIseqDemux <- function(
#' files provided by 10X. A vector or named vector can be given in order to load
#' several data directories. If a named vector is given, the cell barcode names
#' will be prefixed with the name.
#' @param file.prefix.geo Specify file name prefix in front of barcodes, features, and matrix files.
#' Useful for importing files downloaded from public repository without renaming. All 3 files must
#' have the same prefix. All files must remain zipped with .gz suffix as downloaded from GEO. Make sure
#' to include the trailing "_".
#' @param gene.column Specify which column of genes.tsv or features.tsv to use for gene names; default is 2
#' @param cell.column Specify which column of barcodes.tsv to use for cell names; default is 1
#' @param unique.features Make feature names unique (default TRUE)
Expand Down Expand Up @@ -768,10 +772,18 @@ MULTIseqDemux <- function(
#' data <- Read10X(data.dir = data_dir)
#' seurat_object = CreateSeuratObject(counts = data$`Gene Expression`)
#' seurat_object[['Protein']] = CreateAssayObject(counts = data$`Antibody Capture`)
#'
#' # For files downloaded from NCBI GEO with file prefixes
#' data_dir <- 'path/to/data/directory'
#' list.files(data_dir) # Should show barcodes.tsv, genes.tsv, and matrix.mtx with additional prefix from GEO record
#' expression_matrix <- Read10X(data.dir = data_dir, file.prefix.geo = "GSM_XXXXXX_NAME_")
#' seurat_object = CreateSeuratObject(counts = expression_matrix)
#'
#' }
#'
Read10X <- function(
data.dir = NULL,
file.prefix.geo = NULL,
gene.column = 2,
cell.column = 1,
unique.features = TRUE,
Expand All @@ -787,9 +799,17 @@ Read10X <- function(
gene.loc <- file.path(run, 'genes.tsv')
features.loc <- file.path(run, 'features.tsv.gz')
matrix.loc <- file.path(run, 'matrix.mtx')
# import files with prefix in front of barcodes, features, matrix (ie downloaded from NCBI GEO).
prefix <- file.prefix.geo[i]
if (!is.null(x = file.prefix.geo)) {
barcode.loc <- paste0(run, prefix,'barcodes.tsv.gz')
gene.loc <- paste0(run, prefix,'genes.tsv.gz')
features.loc <- paste0(run, prefix,'features.tsv.gz')
matrix.loc <- paste0(run, prefix,'matrix.mtx.gz')
}
# Flag to indicate if this data is from CellRanger >= 3.0
pre_ver_3 <- file.exists(gene.loc)
if (!pre_ver_3) {
if (!pre_ver_3 && is.null(x = file.prefix.geo)) {
addgz <- function(s) {
return(paste0(s, ".gz"))
}
Expand Down