-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathSCopeLoomR_tutorial.Rmd
346 lines (276 loc) · 10.8 KB
/
SCopeLoomR_tutorial.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
---
title: "SCopeLoomR Tutorial - Creating and reading .loom files"
package: r pkg_ver('SCopeLoomR')
output:
html_notebook:
toc: yes
html_document:
keep_md: true
df_print: paged
toc: yes
toc_float: yes
BiocStyle::html_document:
number_sections: no
pdf_document:
toc: yes
vignette: |
%\VignetteIndexEntry{SCopeLoomR}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{knitr::rmarkdown}
---
```{r setup, include=FALSE, echo=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(SCopeLoomR)
library(SingleCellExperiment)
```
*HTML built on `r format(Sys.time(), "%b %d, %Y")` with SCopeLoomR **version `r packageVersion("SCopeLoomR")`***.
# Introduction
The .loom file format is designed to store very large omics datasets. It has been created is maintained by the [Linnarsson lab](http://linnarssonlab.org/), and its naming conventions are described in [loompy](http://linnarssonlab.org/loompy/conventions/index.html)
`SCopeLoomR` is an R package to easily create and manipulate .loom files. These loom files are compatible with [SCope](http://scope.aertslab.org), a loom viewer which also allows to visualize the results from [SCENIC](http://scenic.aertslab.org) (e.g.: regulon, regulon activities).
# Installation
For example:
```{r install, eval=FALSE}
#install.packages("devtools")
devtools::install_github("aertslab/SCopeLoomR")
```
# Creating a loom object
## Load Data
Load the data to include in the loom file:
```{r loadData}
library(SingleCellExperiment)
library(SCopeLoomR)
data(sce) # e.g. a SingleCellExperiment
# DGEM (Digital gene expression matrix)
dgem <- counts(sce)
dim(dgem)
class(dgem)
head(colnames(dgem)) #should contain the Cell ID/name
# Known cell information/annotation
cell.info <- colData(sce)
cell.info$nGene <- colSums(dgem>0)
head(cell.info)
# Default embedding (e.g. t-SNE or PCA coordinates)
data(tSNE_expr)
default.tsne <- tSNE_expr
default.tne.name <- "t-SNE on full expression matrix"
head(default.tsne)
```
## Create loom file
Minimum information required to create the .loom object:
```{r createLoom}
library(SCopeLoomR)
### Create the minimal loom file
file.name <- "example.loom"
build_loom(
file.name=file.name,
dgem=dgem,
title="Fake expression dataset for examples",
genome="Mouse", # Just for user information, not used internally
default.embedding=default.tsne,
default.embedding.name=default.tne.name
)
#
```
## Add optional information
To add any following information to a loom, please run the following command before
```{r}
loom <- open_loom(file.path = file.name, mode = "r+")
```
### Hierarchy
You can organize/group your .loom files in SCope by specifiying differents grouping levels. The current .loom file will be put in `Mouse -> Toy Datasets` branch of the SCope loom tree.
```{r addHierarchy}
add_hierarchy(
loom = loom,
hierarchy = create_hierarchy(
level.1.name = "Mouse",
level.2.name = "Toy Datasets",
level.3.name = ""
)
)
```
The same command can be used to update the hierarchy of a .loom file (set `overwrite=TRUE`):
```{r, eval=FALSE}
add_hierarchy(
loom = loom,
hierarchy = create_hierarchy(
level.1.name = "[level-1-name]",
level.2.name = "[level-2-name]",
level.3.name = "[level-3-name]"
),
overwrite = T
)
```
### Annotations/metrics
Annotations and/or metrics can be added to query in [SCope](http://scope.aertslab.org).
```{r addAnnot}
# Add annotation (categorical variable)
add_col_attr(
loom=loom,
key = "Cell type",
value=cell.info$cellType,
as.annotation=T
)
# Add metric (numerical variable)
add_col_attr(
loom=loom,
key = "Age",
value=sample(0:20, nrow(cell.info), replace=T),
as.metric = T
)
```
### SCENIC results
```{r addSCENIC, eval=TRUE, warning=FALSE}
scenic.dir <- file.path(system.file('extdata', package='SCopeLoomR'), "SCENIC_fakeOutput/") # Modify for your analysis
# Regulon activity (AUC matrix)
library(AUCell)
regulonsAUC <- readRDS(file.path(scenic.dir, "int/3.4_regulonAUC.Rds"))
add_scenic_regulons_auc_matrix(loom=loom, regulons.AUC=getAUC(regulonsAUC))
# Regulons (gene list), regulon thresholds (optional) and regulon motifs (optional)
regulons <- readRDS(file.path(scenic.dir, "int/3.1_regulons_forAUCell.Rds"))
aucThresholds <- readRDS(file.path(scenic.dir, "int/3.5_AUCellThresholds.Rds"))
regulon.enrichment.table <- NULL # readRDS(file.path(scenic.dir, "int/2.3_motifEnrichment.Rds"))
add_scenic_regulons(loom=loom
, regulons=regulons
, regulon.threshold.assignments=aucThresholds# $threshold.assignment # Optional
, regulon.enrichment.table = regulon.enrichment.table) # Optional
# Alternative t-SNE
tSNE <- readRDS(file.path(scenic.dir, "int/tSNE_AUC_50pcs_50perpl.Rds"))
add_embedding(loom=loom, embedding=tSNE$Y, name="SCENIC (t-SNE on AUC)")
```
### Seurat results
*(Not run in this example. You would need to use your own file names)*
```{r addSeuratEmbedding, eval=FALSE}
seurat.dir <- "Seurat_output/"
seurat.tsne <- readRDS(file = paste0(seurat.dir, "seurat_tsne.rds.gz"))
seurat <- readRDS(paste0(seurat.dir, "seuratObject.rds.gz"))
# Add extra embeddings
add_embedding(loom=loom, embedding=seurat.tsne, name="Seurat 82PC, 30perp"))
```
#### Clusterings
```{r addSeuratClusters, eval=FALSE}
add_seurat_clustering(loom = loom
, seurat = seurat)
```
##### Adding clustering(s) along with annotation for a given resolution (default one if set)
```{r addSeuratClusters2, eval=FALSE}
seurat.annotation<-read.table(file = paste0(seuratDir, "Res2_Clusters.tsv", header=T, quote = '', sep = "\t", stringsAsFactors=F))
add_seurat_clustering(loom = loom
, seurat = seurat
, default.clustering.resolution = "res.2"
, annotation = seurat.annotation
, annotation.cluster.id.cn = "res.2"
, annotation.cluster.description.cn = "Annotation")
```
##### Adding clustering(s) along with cluster markers
```{r addSeuratClusters3, eval=FALSE}
seurat.resolutions <- get_seurat_clustering_resolutions(seurat = seurat)
seurat.markers.file.path.list<-do.call(what = "c", lapply(X=seurat.resolutions, FUN=function(resolution) {
l<-list()
l[paste0("res.",resolution)]<-paste0(seuratDir, "res.",resolution,"/Seurat_PC82_res",resolution,"_bimod_Cluster_Markers.rds.gz")
return (l)
}))
add_seurat_clustering(loom = loom
, seurat = seurat
, seurat.markers.file.path.list = seurat.markers.file.path.list)
```
##### Adding clustering(s), along with cluster markers and metrics (e.g.: logFC, p-value, ...)
```{r addSeuratClusters4, eval=FALSE}
add_seurat_clustering(loom = loom
, seurat = seurat
, seurat.markers.file.path.list = seurat.markers.file.path.list
, seurat.marker.metric.accessors = c("avg_logFC", "p_val_adj")
, seurat.marker.metric.names = c("Avg. logFC", "Adj. p-value")
, seurat.marker.metric.description = c("Average log fold change from Wilcox differential test (cfr. Seurat)"
, "Adjusted p-value using Bonferroni correction based on the total number of genes in the dataset (cfr. Seurat)"))
```
### Trajectory inference methods results
*(Not run in this example. You would need to use your own file names)*
**Monocle**: Adding the trajectory data from a CellDataSet object generated by monocle.
```{r addMonocleTrajectory, eval=FALSE}
# Add monocle embedding and trajectory
S_matrix <- reducedDimS(cds = cds)
monocle.embedding<-data.frame(t(x = S_matrix[c(1, 2), ]))
add_embedding(loom = loom
, embedding = monocle.embedding
, name = "Monocle (DDRTree)"
, trajectory = create_trajectory_from_monocle(cds = cds))
```
All trajectories methods (> 59) implemented in the **dyno** framework can be added to .loom files and subsequently be displayed in SCope. To see how to run these, please visit https://github.com/dynverse/dyno.
```{r}
add_embedding_from_dyno(loom = loom
, dyno = model
, name = "PAGA (dyno)")
```
## Finalize (save)
```{r finalize}
finalize(loom=loom)
```
# Read data from a loom object
## Get the gene expression matrix
```{r openLoom}
loom_path <- file.path(system.file('extdata', package='SCopeLoomR'), "example.loom")
loom <- open_loom(loom_path, mode="r+")
dgem <- get_dgem(loom)
close_loom(loom)
dgem[1:5,1:5]
```
## Get the gene expression matrix of a given cluster
This .loom file can be downloaded at http://scope.aertslab.org in the left panel under `Drosophila` > `Brain`.
```{r dgemCluster, eval=FALSE}
loom <- open_loom(loom = "Aerts_Fly_AdultBrain_Filtered_57k.loom", mode="r")
cluster10_dgem <- get_cluster_dgem_by_name(loom = loom, cluster.name = "Astrocyte-like - Cluster 10")
close_loom(loom)
```
## Get annotations from a clustering
```{r}
# Get annotations from clustering (default is set to Annotation)
clustering_annotations_df <- get_clustering_annotations(loom = loom)
# Check available clusterings
list_clusterings_names()
# Get annotations from clustering with given name (here: [custom-clustering-name])
clustering_annotations_df <- get_clustering_annotations(
loom = loom,
clustering_name = "[custom-clustering-name]",
)
# Get annotations from clustering with given name (here: [custom-clustering-name]) annotated by person associated with the given ORCID.
clustering_annotations_df <- get_clustering_annotations(
loom = loom,
clustering_name = "[custom-clustering-name]",
curator_orcid = "XXX-XXX-XXX-XXX"
)
# Get annotations from clustering with given name (here: [custom-clustering-name]) annotated by person associated with the given ORCID.
# Save the resulting output (data.frame) to CSV file (here: ./test.csv)
clustering_annotations_df <- get_clustering_annotations(
loom = loom,
clustering_name = "[custom-clustering-name]",
curator_orcid = "XXX-XXX-XXX-XXX",
out_file_path = "./tmp.csv"
)
```
## Get all newly added annotations by ORCID
```{r}
# Get all annotations across all clusterings that were added after 2019-01-01 and from the user with the given XXX-XXX-XXX-XXX ORCID
all_clustering_annotations_df <- get_all_clustering_annotations(
loom = loom,
curator_orcid = "XXX-XXX-XXX-XXX",
from_date = "2019-01-01",
)
```
## Other general accessors
```{r loomSCENIC, fig.height=4, fig.width=4}
loomPath <- file.path(system.file('extdata', package='SCopeLoomR'), "example.loom")
loom <- open_loom(loomPath, mode="r")
# SCENIC results:
regulons_incidMat <- get_regulons(loom)
regulonsAUC <- get_regulonsAuc(loom)
regulonsAucThresholds <- get_regulonThresholds(loom)
embeddings <- get_embeddings(loom)
close_loom(loom)
# Use the loaded info... e.g.:
plot(embeddings[[1]], pch=16)
```
Don't forget...
```{r}
close_loom(loom)
# or finalize(loom)
```