Motif showed on Featureplot is not matched with the cluster that i did analysis with #1457

Yow98 · 2023-07-26T01:17:17Z

Yow98
Jul 26, 2023

The thing is i wanted to do analysis with my cluster 8 and it is the nearest one with cluster 0 so i did include cluster 8 as ident.1 and cluster 0 as ident.2 for FindMarkers function. However, when i took the highest expressed motif on cluster 8, it didnt show the expression on cluster 8 but only on other clusters, so i dont know whats wrong with my analysis .
Below is my analysis with merging 3 groups ( control, tbi and treatment). Can you also check for me whether the process for merging is correct ?

#ATAC-seq merging
library(Signac)
library(Seurat)
library(GenomicRanges)
library(EnsDb.Mmusculus.v79)
set.seed(1549)

#Common feature
gr <- GRanges(seqnames = "chr1", ranges = IRanges(start = c(20, 70, 300), end = c(120, 200, 400)))
gr

Create peaks

peaks.control <- read.table(
file = "D:/Hieu_atac/snATAC-seq/NGS1090763/Sham/outs/peaks.bed",
col.names = c("chr", "start", "end")
)

peaks.tbi <- read.table(
file = "D:/Hieu_atac/snATAC-seq/NGS1090763/TBI/outs/filtered_peak_bc_matrix/peaks.bed",
col.names = c("chr", "start", "end")
)

peaks.tbiDHEA <- read.table(
file = "D:/Hieu_atac/snATAC-seq/NGS1090763/TBI_DHEA/outs/filtered_peak_bc_matrix/peaks.bed",
col.names = c("chr", "start", "end")
)

convert to genomic ranges

gr.control <- makeGRangesFromDataFrame(peaks.control)
gr.tbi <- makeGRangesFromDataFrame(peaks.tbi)
gr.tbiDHEA <- makeGRangesFromDataFrame(peaks.tbiDHEA)

Create a unified set of peaks to quantify in each dataset

combined.peaks <- GenomicRanges::reduce(x = c(gr.control, gr.tbi, gr.tbiDHEA))

peakwidths <- width(combined.peaks)
combined.peaks <- combined.peaks[peakwidths < 10000 & peakwidths > 20]
combined.peaks

load metadata

md.control <- read.table(
file = "D:/Hieu_atac/snATAC-seq/NGS1090763/Sham/outs/singlecell.csv",
stringsAsFactors = FALSE,
sep = ",",
header = TRUE,
row.names = 1
)[-1, ]

md.tbi <- read.table(
file = "D:/Hieu_atac/snATAC-seq/NGS1090763/TBI/outs/singlecell.csv",
stringsAsFactors = FALSE,
sep = ",",
header = TRUE,
row.names = 1
)[-1, ]

md.tbiDHEA <- read.table(
file = "D:/Hieu_atac/snATAC-seq/NGS1090763/TBI_DHEA/outs/singlecell.csv",
stringsAsFactors = FALSE,
sep = ",",
header = TRUE,
row.names = 1
)[-1, ]

perform an initial filtering of low count cells

md.control <- md.control[md.control$passed_filters > 500, ]
md.tbi <- md.tbi[md.tbi$passed_filters > 500, ]
md.tbiDHEA <- md.tbiDHEA[md.tbiDHEA$passed_filters > 500, ]

create fragment objects

frags.control <- CreateFragmentObject(
path = "D:/Hieu_atac/snATAC-seq/NGS1090763/ATAC_Sham_TBI_Fragment/sham_fragments.tsv.gz",
cells = rownames(md.control)
)

frags.tbi <- CreateFragmentObject(
path = "D:/Hieu_atac/snATAC-seq/NGS1090763/TBI/outs/fragments.tsv.gz",
cells = rownames(md.tbi)
)

frags.tbiDHEA <- CreateFragmentObject(
path = "D:/Hieu_atac/snATAC-seq/NGS1090763/TBI_DHEA/outs/fragments.tsv.gz",
cells = rownames(md.tbiDHEA)
)

##create count matrix

control.counts <- FeatureMatrix(
fragments = frags.control,
features = combined.peaks,
cells = rownames(md.control)
)

tbi.counts <- FeatureMatrix(
fragments = frags.tbi,
features = combined.peaks,
cells = rownames(md.tbi)
)

tbiDHEA.counts <- FeatureMatrix(
fragments = frags.tbiDHEA,
features = combined.peaks,
cells = rownames(md.tbiDHEA)
)

##Create assay
control_assay <- CreateChromatinAssay(control.counts, fragments = frags.control,genome = "mm10")
control <- CreateSeuratObject(control_assay, assay = "ATAC", meta.data=md.control)

tbi_assay <- CreateChromatinAssay(tbi.counts, fragments = frags.tbi,genome = "mm10")
tbi <- CreateSeuratObject(tbi_assay, assay = "ATAC", meta.data=md.tbi)

tbiDHEA_assay <- CreateChromatinAssay(tbiDHEA.counts, fragments = frags.tbiDHEA,genome = "mm10")
tbiDHEA <- CreateSeuratObject(tbiDHEA_assay, assay = "ATAC", meta.data=md.tbiDHEA)

control$group <- 'control'
tbi$group <- 'tbi'
tbiDHEA$group <- 'tbiDHEA'

combined.atac <- merge(
x = control,
y = list(tbi,tbiDHEA),
add.cell.ids = c("control", "tbi", "tbiDHEA")
)
dim(combined.atac)

combined.atac[["ATAC"]]

extract gene annotations from EnsDb

annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)

change to UCSC style since the data was mapped to mm10

seqlevels(annotations) <- paste0('chr', seqlevels(annotations))
genome(annotations) <- "mm10"

add the gene information to the object

Annotation(combined.atac) <- annotations

combined.atac <- NucleosomeSignal(object = combined.atac)
combined.atac <- TSSEnrichment(combined.atac, fast = FALSE)
combined.atac$high.tss <- ifelse(combined.atac$TSS.enrichment > 2, 'High', 'Low')

combined.atac$pct_reads_in_peaks <- combined.atac$peak_region_fragments / combined.atac$passed_filters * 100
combined.atac$blacklist_ratio <- combined.atac$blacklist_region_fragments / combined.atac$peak_region_fragments

VlnPlot(
object = combined.atac,
features = c('pct_reads_in_peaks', 'peak_region_fragments',
'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'),
pt.size = 0.1,
ncol = 5,
split.by = 'group'
)

FeatureScatter(combined.atac, feature1 = "nCount_ATAC", feature2 = "TSS.enrichment")
FeatureScatter(combined.atac, feature1 = "nCount_ATAC", feature2 = "nucleosome_signal")

combined.atac <- subset(
x = combined.atac,
subset = peak_region_fragments > 1000 &
peak_region_fragments < 40000 &
pct_reads_in_peaks > 20 &
blacklist_ratio < 0.05 &
nucleosome_signal < 4 &
TSS.enrichment > 2
)

dim(combined.atac)

combined.atac <- RunTFIDF(combined.atac)
combined.atac <- FindTopFeatures(combined.atac, min.cutoff = "q0")
combined.atac <- RunSVD(combined.atac)
DepthCor(combined.atac)
combined.atac <- RunUMAP(
object = combined.atac,
reduction = 'lsi',
dims = 2:15
)
combined.atac <- FindNeighbors(
object = combined.atac,
reduction = 'lsi',
dims = 2:15
)
combined.atac <- FindClusters(
object = combined.atac,
algorithm = 3,
resolution = 0.5,
verbose = FALSE
)

DimPlot(object = combined.atac, label = TRUE)

##MOTIF
library(JASPAR2022)
library(TFBSTools)
library(motifmatchr)
library(BSgenome.Mmusculus.UCSC.mm10)
library(patchwork)
library(cicero)

pfm <- getMatrixSet(
x = JASPAR2022,
opts = list(collection = "CORE", tax_group = 'vertebrates', all_versions = FALSE)
)

combined.atac <- AddMotifs(
object = combined.atac,
genome = BSgenome.Mmusculus.UCSC.mm10,
pfm = pfm
)

motif.matrix <- CreateMotifMatrix(
features = granges(combined.atac),
pwm = pfm,
genome = 'mm10'
)

motif <- CreateMotifObject(
data = motif.matrix,
pwm = pfm
)

FindMarkers between 8 and 0
da_peaks <- FindMarkers(
object = combined.atac,
ident.1 = '8',
ident.2 = '0',
only.pos = TRUE,
test.use = 'LR',
min.pct = 0.05,
latent.vars = 'nCount_ATAC'
)

get top differentially accessible peaks

top.da.peak <- rownames(da_peaks[da_peaks$p_val < 0.005, ])

find peaks open in cluster 8

open.peaks <- AccessiblePeaks(combined.atac, idents = c("8"))

match the overall GC content in the peak set

meta.feature <- GetAssayData(combined.atac, assay = "ATAC", slot = "meta.features")
peaks.matched <- MatchRegionStats(
meta.feature = meta.feature[open.peaks, ],
query.feature = meta.feature[top.da.peak, ],
n = 50000
)

enriched.motifs <- FindMotifs(
object = combined.atac,
features = top.da.peak,
background=peaks.matched
)

MotifPlot(
object = combined.atac,
motifs = head(rownames(enriched.motifs))
)

#SessionInfo
R version 4.2.2 (2022-10-31 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 18363)

Matrix products: default

locale:
[1] LC_COLLATE=Chinese (Traditional)_Taiwan.utf8 LC_CTYPE=Chinese (Traditional)_Taiwan.utf8
[3] LC_MONETARY=Chinese (Traditional)_Taiwan.utf8 LC_NUMERIC=C
[5] LC_TIME=Chinese (Traditional)_Taiwan.utf8

attached base packages:
[1] grid splines stats4 stats graphics grDevices utils datasets methods base

other attached packages:
[1] BSgenome.Mmusculus.UCSC.mm10_1.4.3 BSgenome_1.66.3
[3] rtracklayer_1.58.0 Biostrings_2.66.0
[5] XVector_0.38.0 TFBSTools_1.36.0
[7] motifmatchr_1.20.0 JASPAR2022_0.99.7
[9] BiocFileCache_2.6.1 dbplyr_2.3.3
[11] SCopeLoomR_0.13.0 SingleCellExperiment_1.20.1
[13] SummarizedExperiment_1.28.0 GenomicRanges_1.50.2
[15] GenomeInfoDb_1.34.9 MatrixGenerics_1.10.0
[17] matrixStats_0.63.0 SCENIC_1.1.2-01
[19] magrittr_2.0.3 lubridate_1.9.2
[21] forcats_1.0.0 stringr_1.5.0
[23] purrr_1.0.1 readr_2.1.4
[25] tibble_3.1.8 tidyverse_2.0.0
[27] RColorBrewer_1.1-3 monocle_2.26.0
[29] DDRTree_0.1.5 irlba_2.3.5.1
[31] VGAM_1.1-8 GSVA_1.46.0
[33] data.table_1.14.8 org.Mm.eg.db_3.16.0
[35] AnnotationDbi_1.60.2 IRanges_2.32.0
[37] S4Vectors_0.36.1 clusterProfiler_4.6.2
[39] EnhancedVolcano_1.16.0 ggrepel_0.9.2
[41] tidyr_1.3.0 KernSmooth_2.23-20
[43] fields_14.1 viridis_0.6.3
[45] viridisLite_0.4.2 spam_2.9-1
[47] DoubletFinder_2.0.3 SeuratObject_4.1.3
[49] Seurat_4.3.0 CommPath_1.0.0
[51] igraph_1.3.5 Matrix_1.6-0
[53] ggplot2_3.4.2 dplyr_1.1.0
[55] plyr_1.8.8 reshape2_1.4.4
[57] circlize_0.4.15 Signac_1.9.0
[59] Biobase_2.58.0 BiocGenerics_0.44.0

loaded via a namespace (and not attached):
[1] KEGGREST_1.38.0 locfit_1.5-9.7 slam_0.1-50
[4] remotes_2.4.2 lattice_0.20-45 spatstat.utils_3.0-2
[7] vctrs_0.5.2 utf8_1.2.3 blob_1.2.4
[10] R.oo_1.25.0 withr_2.5.0 foreign_0.8-83
[13] ggnetwork_0.5.12 registry_0.5-1 readxl_1.4.2
[16] lifecycle_1.0.3 cellranger_1.1.0 munsell_0.5.0
[19] ScaledMatrix_1.6.0 ggalluvial_0.12.5 codetools_0.2-18
[22] lmtest_0.9-40 limma_3.54.2 annotate_1.76.0
[25] parallelly_1.36.0 fs_1.6.2 fastmatch_1.1-3
[28] basilisk_1.11.3 metapod_1.6.0 Rtsne_0.16
[31] stringi_1.7.12 RcppRoll_0.3.0 qlcMatrix_0.9.7
[34] sctransform_0.3.5 polyclip_1.10-4 yulab.utils_0.0.6
[37] rhdf5filters_1.10.1 goftest_1.2-3 patchwork_1.1.2
[40] cluster_2.1.4 ggraph_2.1.0 TFMPvalue_0.0.9
[43] ape_5.7-1 pkgconfig_2.0.3 pheatmap_1.0.12
[46] prettyunits_1.1.1 sparseMatrixStats_1.10.0 ggridges_0.5.4
[49] timechange_0.2.0 httr_1.4.6 liana_0.1.12
[52] treeio_1.25.0.001 progress_1.2.2 GetoptLong_1.0.5
[55] beachmat_2.14.2 graphlayouts_0.8.4 basilisk.utils_1.11.2
[58] ggfun_0.1.1 gson_0.1.0 snow_0.4-4
[61] htmltools_0.5.5 miniUI_0.1.1.1 usethis_2.2.2
[64] fastICA_1.2-3 NMF_0.26 yaml_2.3.7
[67] leidenbase_0.1.14 pillar_1.9.0 later_1.3.0
[70] fitdistrplus_1.1-11 glue_1.6.2 DBI_1.1.3
[73] BiocParallel_1.32.5 foreach_1.5.2 enrichR_3.2
[76] dotCall64_1.0-2 gtable_0.3.3 GOSemSim_2.24.0
[79] rsvd_1.0.5 caTools_1.18.2 GlobalOptions_0.1.2
[82] fastmap_1.1.0 broom_1.0.5 checkmate_2.1.0
[85] promises_1.2.0.1 FNN_1.1.3.2 ggforce_0.4.1
[88] hms_1.1.3 png_0.1-8 clue_0.3-64
[91] glmGamPoi_1.10.2 ggtree_3.9.0 spatstat.explore_3.1-0
[94] lazyeval_0.2.2 Formula_1.2-5 profvis_0.3.8
[97] chromVAR_1.20.2 crayon_1.5.2 gridBase_0.4-7
[100] svglite_2.1.1 tidyselect_1.2.0 xfun_0.39
[103] ks_1.14.0 BiocSingular_1.14.0 AUCell_1.20.2
[106] survival_3.4-0 rappdirs_0.3.3 bit64_4.0.5
[109] rngtools_1.5.2 CNEr_1.34.0 combinat_0.0-8
[112] ggsignif_0.6.4 R.methodsS3_1.8.2 htmlTable_2.4.1
[115] xtable_1.8-4 DT_0.28 renv_1.0.0
[118] cachem_1.0.6 DelayedArray_0.24.0 abind_1.4-5
[121] systemfonts_1.0.4 mime_0.12 nabor_0.5.0
[124] rjson_0.2.21 aplot_0.1.10 rstatix_0.7.2
[127] sparsesvd_0.2-2 processx_3.8.1 Nebulosa_1.8.0
[130] spatstat.sparse_3.0-1 tools_4.2.2 cli_3.6.0
[133] logger_0.2.2 decoupleR_2.5.0 proxy_0.4-27
[136] future.apply_1.11.0 ggplotify_0.1.1 DelayedMatrixStats_1.20.0
[139] ggbeeswarm_0.7.2 qvalue_2.30.0 fgsea_1.24.0
[142] sna_2.7-1 HDF5Array_1.26.0 ica_1.0-3
[145] pbapply_1.7-2 ggrastr_1.0.2 plotly_4.10.2
[148] ggpubr_0.6.0 scuttle_1.8.4 R.utils_2.12.2
[151] tweenr_2.0.2 HSMMSingleCell_1.18.0 zlibbioc_1.44.0
[154] devtools_2.4.5 restfulr_0.0.15 shadowtext_0.1.2
[157] tzdb_0.3.0 ps_1.7.5 fansi_1.0.4
[160] tidygraph_1.2.3 GSEABase_1.60.0 tensor_1.5
[163] ROCR_1.0-11 backports_1.4.1 farver_2.1.1
[166] bit_4.0.5 Rsamtools_2.14.0 RANN_2.6.1
[169] CellChat_1.6.1 shiny_1.7.4.1 BiocIO_1.8.0
[172] scattermore_0.8 DOSE_3.24.2 scatterpie_0.2.1
[175] RcppAnnoy_0.0.20 maps_3.4.1 downloader_0.4
[178] rstudioapi_0.15.0 iterators_1.0.14 Rhdf5lib_1.20.0
[181] spatstat.geom_3.1-0 nlme_3.1-160 DirichletMultinomial_1.40.0
[184] gtools_3.9.4 shape_1.4.6 beeswarm_0.4.0
[187] network_1.18.1 listenv_0.9.0 rhdf5_2.42.1
[190] generics_0.1.3 colorspace_2.1-0 base64enc_0.1-3
[193] XML_3.99-0.13 pkgbuild_1.4.2 spatstat.data_3.0-1
[196] sp_1.6-0 dqrng_0.3.0 GenomeInfoDbData_1.2.9
[199] R.cache_0.16.0 progressr_0.13.0 evaluate_0.21
[202] memoise_2.0.1 coda_0.19-4 ComplexHeatmap_2.15.4
[205] knitr_1.43 doParallel_1.0.17 vipor_0.4.5
[208] httpuv_1.6.8 Rcpp_1.0.10 BiocManager_1.30.21
[211] seqLogo_1.64.0 pkgload_1.3.2.1 jsonlite_1.8.4
[214] Hmisc_5.1-0 SCpubr_1.1.2.9000 RSpectra_0.16-1
[217] dir.expiry_1.6.0 digest_0.6.31 poweRlaw_0.70.6
[220] OmnipathR_3.7.10 rprojroot_2.0.3 cowplot_1.1.1
[223] bitops_1.0-7 RSQLite_2.2.20 rmarkdown_2.23
[226] globals_0.16.2 compiler_4.2.2 nnet_7.3-18
[229] reticulate_1.28 statmod_1.5.0 scran_1.26.2
[232] zoo_1.8-11 carData_3.0-5 pracma_2.4.2
[235] gridGraphics_0.5-1 rlang_1.1.0 urlchecker_1.0.1
[238] uwot_0.1.14 sessioninfo_1.2.2 ggseqlogo_0.1
[241] rvest_1.0.3 future_1.33.0 mvtnorm_1.2-2
[244] htmlwidgets_1.6.2 hdf5r_1.3.8 WriteXLS_6.4.0
[247] labeling_0.4.2 callr_3.7.3 leiden_0.4.3
[250] curl_5.0.0 scater_1.26.1 parallel_4.2.2
[253] BiocNeighbors_1.16.0 edgeR_3.40.2 filelock_1.0.2
[256] scales_1.2.1 desc_1.4.2 graph_1.76.0
[259] enrichplot_1.18.4 HDO.db_0.99.1 deldir_1.0-6
[262] gridExtra_2.3 bluster_1.8.0 RCurl_1.98-1.10
[265] car_3.1-2 docopt_0.7.1 GO.db_3.16.0
[268] MASS_7.3-58.1 ellipsis_0.3.2 tidytree_0.4.2
[271] spatstat.random_3.1-4 xml2_1.3.3 rpart_4.1.19
[274] R6_2.5.1 mclust_6.0.0 statnet.common_4.9.0
[277] GenomicAlignments_1.34.1

Yow98 · 2023-07-26T06:06:46Z

Yow98
Jul 26, 2023
Author

Hi @timoast , can you explain why this problem happened ? Thank you

0 replies

timoast · 2023-07-27T10:29:18Z

timoast
Jul 27, 2023
Maintainer

It's not clear how you're quantifying motif 'expression'. Are you running chromVAR or you have multiome data and are looking at expression of the corresponding TF gene, or something else?

3 replies

Yow98 Jul 28, 2023
Author

@timoast Hi, Thank you for your response
This is how i quantified motif expression after finding peaks. I did follow the same steps from your motif analysis vignette. And the code is below :
combined.atac <- RunChromVAR(
object = combined.atac,
genome = BSgenome.Mmusculus.UCSC.mm10
)
DefaultAssay(combined.atac) <- 'chromvar'

look at the activity

p2 <- FeaturePlot(
object = combined.atac,
features = "MA0665.1",
min.cutoff = 'q10',
max.cutoff = 'q90',
pt.size = 0.1,
)

I have already run chromVar and changed the default assay into chromvar before visualizing the interested motif on featureplot.
I dont have multiome data. All i have right now are two seperate groups of datasets. One is RNA seq and one is ATAC seq.

timoast Jul 28, 2023
Maintainer

Not sure what the issue is

timoast Jul 28, 2023
Maintainer

Not sure what the issue is

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Motif showed on Featureplot is not matched with the cluster that i did analysis with #1457

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 2 comments 3 replies

{{title}}

{{editor}}'s edit

{{editor}}'s edit

{{title}}

{{title}}

{{title}}

{{title}}

Select a reply

Motif showed on Featureplot is not matched with the cluster that i did analysis with #1457

Yow98 Jul 26, 2023

Create peaks

convert to genomic ranges

Create a unified set of peaks to quantify in each dataset

load metadata

perform an initial filtering of low count cells

create fragment objects

extract gene annotations from EnsDb

change to UCSC style since the data was mapped to mm10

add the gene information to the object

get top differentially accessible peaks

find peaks open in cluster 8

match the overall GC content in the peak set

Replies: 2 comments · 3 replies

Yow98 Jul 26, 2023 Author

timoast Jul 27, 2023 Maintainer

Yow98 Jul 28, 2023 Author

look at the activity

timoast Jul 28, 2023 Maintainer

timoast Jul 28, 2023 Maintainer

Yow98
Jul 26, 2023

Replies: 2 comments 3 replies

Yow98
Jul 26, 2023
Author

timoast
Jul 27, 2023
Maintainer

Yow98 Jul 28, 2023
Author

timoast Jul 28, 2023
Maintainer

timoast Jul 28, 2023
Maintainer