diff --git a/main.nf b/main.nf index 5d6b165..8748c08 100644 --- a/main.nf +++ b/main.nf @@ -1,8 +1,10 @@ nextflow.enable.dsl=2 -include { PREPROCESS } from './modules/local/preprocess' -include { SPACEMARKERS } from './modules/local/spacemarkers' -include { COGAPS } from './modules/local/cogaps' +include { PREPROCESS } from './modules/local/preprocess.nf' +include { COGAPS } from './modules/local/cogaps.nf' +include { SPACEMARKERS; + SPACEMARKERS_MQC; + SPACEMARKERS_IMSCORES } from './modules/local/spacemarkers.nf' workflow COSPACE { @@ -20,6 +22,12 @@ workflow COSPACE { SPACEMARKERS(ch_spacemarkers) + ch_spacemarkers_mqc = SPACEMARKERS.out.spaceMarkers.map { tuple(it[0], it[1]) } + SPACEMARKERS_MQC(ch_spacemarkers_mqc) + + ch_spacemarkers_imscores = SPACEMARKERS.out.spaceMarkers.map { tuple(it[0], it[1]) } + SPACEMARKERS_IMSCORES(ch_spacemarkers_imscores) + emit: dgCMatrix = PREPROCESS.out.dgCMatrix cogapsResult = COGAPS.out.cogapsResult @@ -27,7 +35,8 @@ workflow COSPACE { optParams = SPACEMARKERS.out.optParams spaceMarkers = SPACEMARKERS.out.spaceMarkers versions = SPACEMARKERS.out.versions - + spacemarkers_mqc = SPACEMARKERS_MQC.out.spacemarkers_mqc + spacemarkers_imscores = SPACEMARKERS_IMSCORES.out.spacemarkers_imscores } workflow { diff --git a/modules/local/spacemarkers.nf b/modules/local/spacemarkers.nf index 31c9de1..9263423 100644 --- a/modules/local/spacemarkers.nf +++ b/modules/local/spacemarkers.nf @@ -1,7 +1,7 @@ process SPACEMARKERS { tag "$meta.id" label 'process_high_memory' - container 'ghcr.io/fertiglab/spacemarkers:1.1.2.3' + container 'ghcr.io/deshpandelab/spacemarkers@sha256:6357fa4be041ef321ac983df45485c8cb25197ff787110f7562ec10116a39692' input: tuple val(meta), path(cogapsResult), path(data) @@ -24,45 +24,209 @@ process SPACEMARKERS { SpaceMarkers: \$(Rscript -e 'print(packageVersion("SpaceMarkers"))' | awk '{print \$2}') R: \$(Rscript -e 'print(packageVersion("base"))' | awk '{print \$2}') END_VERSIONS - """ + """ + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env Rscript + dir.create("${prefix}", showWarnings = FALSE) + library("SpaceMarkers") + + #load spatial coords from tissue positions, deconvolved patterns, and expression + coords <- load10XCoords("$data") + features <- getSpatialFeatures("$cogapsResult") + dataMatrix <- load10XExpr("$data") + + #add spatial coordinates to deconvolved data, only use barcodes present in data + spPatterns <- merge(coords, features, by.x = "barcode", by.y = "row.names") + spPatterns <- spPatterns[which(spPatterns[,"barcode"] %in% colnames(dataMatrix)),] + saveRDS(spPatterns, file = "${prefix}/spPatterns.rds") + + #remove genes with low expression, only barcodes present in spatial data + keepGenes <- which(apply(dataMatrix, 1, sum) > 10) + keepBarcodes <- which(colnames(dataMatrix) %in% spPatterns[,"barcode"]) + dataMatrix <- dataMatrix[keepGenes, keepBarcodes] + + #compute optimal parameters for spatial patterns + optParams <- getSpatialParameters(spPatterns); + saveRDS(optParams, file = "${prefix}/optParams.rds") + + #find genes that are differentially expressed in spatial patterns + spaceMarkers <- getPairwiseInteractingGenes(data = dataMatrix, + optParams = optParams, + spPatterns = spPatterns, + mode = "DE", + analysis="enrichment") + + saveRDS(spaceMarkers, file = "${prefix}/spaceMarkers.rds") + + # Get the versions of the packages + spaceMarkersVersion <- packageVersion("SpaceMarkers") + rVersion <- packageVersion("base") + cat(sprintf('"%s":\n SpaceMarkers: %s\n R: %s\n', + "${task.process}", spaceMarkersVersion, rVersion), + file = "versions.yml") + """ +} + +process SPACEMARKERS_MQC { + tag "$meta.id" + label 'process_low' + container 'ghcr.io/deshpandelab/spacemarkers@sha256:6357fa4be041ef321ac983df45485c8cb25197ff787110f7562ec10116a39692' + + input: + tuple val(meta), path(spaceMarkers) + output: + tuple val(meta), path("${prefix}/spacemarkers_mqc.json"), emit: spacemarkers_mqc + path "versions.yml", emit: versions + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env Rscript + dir.create("${prefix}", showWarnings = FALSE) + + #[['']] notation needed to allow nextflow var susbtitution + + sm <- readRDS("$spaceMarkers") + smi <- sm[which(sapply(sm, function(x) length(x[['interacting_genes']]))>0)] + + #interacting patterns stats + n_pairs_total <- length(sm) + n_pairs_interact <- length(smi) + + #spacemarker metric + max_spacemarker_metric <- max(sapply(smi, function(x) { + max(x[['interacting_genes']][[1]][['SpaceMarkersMetric']]) + })) + min_spacemarker_metric <- min(sapply(smi, function(x) { + min(x[['interacting_genes']][[1]][['SpaceMarkersMetric']]) + })) + + #average number of genes in each pair + min_genes <- min(sapply(smi, function(x) { + nrow(x[['interacting_genes']][[1]]) + })) + + #average number of genes in each pair + max_genes <- max(sapply(smi, function(x) { + nrow(x[['interacting_genes']][[1]]) + })) + + #average percent overlap across interacting patterns + avg_hotspot_area <- mean(sapply(smi, function(x) { + sum(!is.na(x[['hotspots']]))/length(x[['hotspots']][,1]) + })) + + #report + report_data <- list( + "${prefix}" = list( + 'Pairs Total' = n_pairs_total, + 'Pairs Interact' = n_pairs_interact, + 'SpaceMarker Metric' = sprintf('%0.1f - %0.1f', min_spacemarker_metric, max_spacemarker_metric), + 'Gene Count' = sprintf('%0.f - %0.f', min_genes, max_genes), + 'Mean Hotspot Area' = avg_hotspot_area + ) + ) + + report <- list( + id = "spacemarkers_mqc", + section_name = "SpaceMarkers", + description = "Tool to identify genes associated with latent space interactions in spatial transcriptomics.", + plot_type = "table", + pconfig = list( + id = "custom_data_table", + title = "SpacemMarkers Stats" + ), + data = report_data + ) + jsonlite::write_json( + x=report, + path = "${prefix}/spacemarkers_mqc.json", + auto_unbox = TRUE, + pretty = TRUE) + + # Get the versions of the packages + spaceMarkersVersion <- packageVersion("SpaceMarkers") + rVersion <- packageVersion("base") + cat(sprintf('"%s":\n SpaceMarkers: %s\n R: %s\n', + "${task.process}", spaceMarkersVersion, rVersion), + file = "versions.yml") + """ + + stub: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" """ mkdir "${prefix}" - Rscript -e 'library("SpaceMarkers"); - #load spatial coords from tissue positions, deconvolved patterns, and expression - coords <- load10XCoords("$data") - features <- getSpatialFeatures("$cogapsResult") - dataMatrix <- load10XExpr("$data") - - #add spatial coordinates to deconvolved data, only use barcodes present in data - spPatterns <- merge(coords, features, by.x = "barcode", by.y = "row.names") - spPatterns <- spPatterns[which(spPatterns[,"barcode"] %in% colnames(dataMatrix)),] - saveRDS(spPatterns, file = "${prefix}/spPatterns.rds"); - - #remove genes with low expression, only barcodes present in spatial data - keepGenes <- which(apply(dataMatrix, 1, sum) > 10) - keepBarcodes <- which(colnames(dataMatrix) %in% spPatterns[,"barcode"]) - dataMatrix <- dataMatrix[keepGenes, keepBarcodes] - - #compute optimal parameters for spatial patterns - optParams <- getSpatialParameters(spPatterns); - saveRDS(optParams, file = "${prefix}/optParams.rds"); - - #find genes that are differentially expressed in spatial patterns - spaceMarkers <- getPairwiseInteractingGenes(data = dataMatrix, \ - optParams = optParams, \ - spPatterns = spPatterns, \ - mode = "DE", \ - analysis="enrichment"); - - saveRDS(spaceMarkers, file = "${prefix}/spaceMarkers.rds"); - ' + touch "${prefix}/spacemarkers_mqc.json" cat <<-END_VERSIONS > versions.yml - "${task.process}": - SpaceMarkers: \$(Rscript -e 'print(packageVersion("SpaceMarkers"))' | awk '{print \$2}') - R: \$(Rscript -e 'print(packageVersion("base"))' | awk '{print \$2}') + "${task.process}": + SpaceMarkers: \$(Rscript -e 'print(packageVersion("SpaceMarkers"))' | awk '{print \$2}') + R: \$(Rscript -e 'print(packageVersion("base"))' | awk '{print \$2}') + END_VERSIONS + """ +} + +process SPACEMARKERS_IMSCORES { + tag "$meta.id" + label 'process_low' + container 'ghcr.io/deshpandelab/spacemarkers@sha256:6357fa4be041ef321ac983df45485c8cb25197ff787110f7562ec10116a39692' + + input: + tuple val(meta), path(spaceMarkers) + output: + tuple val(meta), path("${prefix}/imscores.csv"), emit: spacemarkers_imscores + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + #!/usr/bin/env Rscript + dir.create("${prefix}", showWarnings = FALSE) + + sm <- readRDS("$spaceMarkers") + smi <- sm[which(sapply(sm, function(x) length(x[['interacting_genes']]))>0)] + + fields <- c('Gene', 'SpaceMarkersMetric') + + imscores <- lapply(seq_along(smi), function(x) { + df <- smi[[x]][['interacting_genes']][[1]][,fields] + #rename to metric to its parent item name + setNames(df, c('Gene', names(smi)[x])) + }) + + imscores <- Reduce(function(x, y) { + merge(x, y, by="Gene", all=TRUE) + }, x=imscores, right=FALSE) + + if(is.null(imscores)) { + imscores <- data.frame(Gene=character(0)) + } + + write.csv(imscores, file = "${prefix}/imscores.csv", row.names = FALSE) + + # Get the versions of the packages + spaceMarkersVersion <- packageVersion("SpaceMarkers") + rVersion <- packageVersion("base") + cat(sprintf('"%s":\n SpaceMarkers: %s\n R: %s\n', + "${task.process}", spaceMarkersVersion, rVersion), + file = "versions.yml") + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir "${prefix}" + touch "${prefix}/imscores.csv" + cat <<-END_VERSIONS > versions.yml + "${task.process}": + SpaceMarkers: \$(Rscript -e 'print(packageVersion("SpaceMarkers"))' | awk '{print \$2}') + R: \$(Rscript -e 'print(packageVersion("base"))' | awk '{print \$2}') END_VERSIONS """ }