-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy path03-save-coldata.R
76 lines (60 loc) · 2.03 KB
/
03-save-coldata.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env Rscript
# This script is used to grab the colData from a SCE object and save it as a TSV file
library(optparse)
option_list <- list(
make_option(
opt_str = c("--sce_file"),
type = "character",
help = "Path to RDS file containing a processed SingleCellExperiment object from scpca-nf"
),
make_option(
opt_str = c("--output_file"),
type = "character",
help = "Path to file where colData will be saved, must end in `.tsv`"
)
)
# Parse options
opt <- parse_args(OptionParser(option_list = option_list))
# Set up -----------------------------------------------------------------------
# make sure input files exist
stopifnot(
"sce file does not exist" = file.exists(opt$sce_file)
)
# load SCE
suppressPackageStartupMessages({
library(SingleCellExperiment)
})
# Extract colData --------------------------------------------------------------
# read in sce
sce <- readr::read_rds(opt$sce_file)
# extract ids
library_id <- metadata(sce)$library_id
# account for multiplexed libraries that have multiple samples
# for now just combine sample ids into a single string and don't worry about demultiplexing
sample_id <- metadata(sce)$sample_id |>
paste0(collapse = ";")
project_id <- metadata(sce)$project_id
# check if cell line since cell lines don't have any cell type assignments
# account for having more than one sample and a list of sample types
# all sample types should be the same theoretically
is_cell_line <- all(metadata(sce)$sample_type == "cell line")
# only create and write table for non-cell line samples
if(!is_cell_line){
# get df with ids, barcodes, and cell type assignments
celltype_df <- colData(sce) |>
as.data.frame() |>
dplyr::mutate(
project_id = project_id,
sample_id = sample_id,
library_id = library_id
) |>
dplyr::select(
project_id,
sample_id,
library_id,
barcodes,
contains("celltype") # get both singler and cellassign with ontology
)
# save tsv
readr::write_tsv(celltype_df, opt$output_file)
}