From ebc48690b397ac3f594ef83541444a63aadc2da6 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 13:08:10 -0600 Subject: [PATCH 01/25] add scripts used for assigning cell types --- .../usr/bin/assign-consensus-label.R | 108 ++++++++++++++++++ .../resources/usr/bin/save-celltypes.R | 77 +++++++++++++ 2 files changed, 185 insertions(+) create mode 100644 modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R create mode 100644 modules/cell-type-consensus/resources/usr/bin/save-celltypes.R diff --git a/modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R b/modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R new file mode 100644 index 0000000..937e4c2 --- /dev/null +++ b/modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R @@ -0,0 +1,108 @@ +#!/usr/bin/env Rscript + +# This script is used to combine all TSV files containing cell types into a single TSV file +# The output TSV file will include the following added columns: +# panglao_ontology: CL term assigned to panglao term +# panglao_annotation: human readable value associated with the CL term for panglao term +# blueprint_annotation_cl: human readable value associated with the CL term for singler_celltype_ontology +# consensus_annotation: human readable name associated with the consensus label +# consensus_ontology: CL ontology term for the consensus cell type + +library(optparse) + +option_list <- list( + make_option( + opt_str = c("--celltype_tsv_dir"), + type = "character", + help = "Path to directory containing TSV files with cell type annotations from single samples. + All TSV files in this directory will be combined into a single file." + ), + make_option( + opt_str = c("--panglao_ref_file"), + type = "character", + help = "Path to file with panglao assignments and associated cell ontology ids" + ), + make_option( + opt_str = c("--consensus_ref_file"), + type = "character", + help = "Path to file containing the reference for assigning consensus cell type labels" + ), + make_option( + opt_str = c("--output_file"), + type = "character", + help = "Path to file where combined TSV file will be saved. + File name must end in either `.tsv` or `.tsv.gz` to save a compressed TSV file" + ) +) + +# Parse options +opt <- parse_args(OptionParser(option_list = option_list)) + +# Prep ref files --------------------------------------------------------------- + +# make sure reference files exist +stopifnot( + "panglao reference file does not exist" = file.exists(opt$panglao_ref_file), + "cell type consensus reference file does not exist" = file.exists(opt$consensus_ref_file), + "output file must end in `.tsv` or `.tsv.gz`" = stringr::str_detect(opt$output_file, ".tsv|.tsv.gz") +) + +# read in ref files +# change names for panglao ref to match what's in the consensus file +panglao_ref_df <- readr::read_tsv(opt$panglao_ref_file) |> + dplyr::rename( + panglao_ontology = ontology_id, + panglao_annotation = human_readable_value, + original_panglao_name = panglao_cell_type + ) + +consensus_ref_df <- readr::read_tsv(opt$consensus_ref_file) |> + # select columns to use for joining and consensus assigmments + dplyr::select( + panglao_ontology, + original_panglao_name, + blueprint_ontology, + consensus_annotation, + consensus_ontology + ) + +# grab singler ref from celldex +blueprint_ref <- celldex::BlueprintEncodeData() + +# grab obo file, we need this to map the ontologies from blueprint +cl_ont <- ontologyIndex::get_ontology("http://purl.obolibrary.org/obo/cl/releases/2024-09-26/cl-basic.obo") + + +# get ontologies and human readable name into data frame for blueprint +# in scpca-nf we don't include the cl name so this lets us add it in +blueprint_df <- data.frame( + blueprint_ontology = blueprint_ref$label.ont, + blueprint_annotation_cl = cl_ont$name[blueprint_ref$label.ont] +) |> + unique() |> + tidyr::drop_na() + +# get list of all TSV files +all_files <- list.files(path = opt$celltype_tsv_dir, + pattern = "*.tsv", + full.names = TRUE) + +# read in TSV files and combine into a single df +all_cells_df <- all_files |> + purrr::map(readr::read_tsv) |> + dplyr::bind_rows() |> + # add columns for panglao ontology and consensus + # first add panglao ontology + dplyr::left_join(panglao_ref_df, by = c("cellassign_celltype_annotation" = "original_panglao_name")) |> + # now add in all the blueprint columns + dplyr::left_join(blueprint_df, by = c("singler_celltype_ontology" = "blueprint_ontology")) |> + # then add consensus labels + dplyr::left_join(consensus_ref_df, + by = c("singler_celltype_ontology" = "blueprint_ontology", + "cellassign_celltype_annotation" = "original_panglao_name", + "panglao_ontology")) |> + # use unknown for NA annotation but keep ontology ID as NA + dplyr::mutate(consensus_annotation = dplyr::if_else(is.na(consensus_annotation), "Unknown", consensus_annotation)) + +# export file +readr::write_tsv(all_cells_df, opt$output_file) diff --git a/modules/cell-type-consensus/resources/usr/bin/save-celltypes.R b/modules/cell-type-consensus/resources/usr/bin/save-celltypes.R new file mode 100644 index 0000000..91081c8 --- /dev/null +++ b/modules/cell-type-consensus/resources/usr/bin/save-celltypes.R @@ -0,0 +1,77 @@ +#!/usr/bin/env Rscript + +# This script is used to grab the cell type annotations from the +# colData from a SCE object and save them to a TSV file + +library(optparse) + +option_list <- list( + make_option( + opt_str = c("--sce_file"), + type = "character", + help = "Path to RDS file containing a processed SingleCellExperiment object from scpca-nf" + ), + make_option( + opt_str = c("--output_file"), + type = "character", + help = "Path to file where colData will be saved, must end in `.tsv`" + ) +) + +# Parse options +opt <- parse_args(OptionParser(option_list = option_list)) + +# Set up ----------------------------------------------------------------------- + +# make sure input files exist +stopifnot( + "sce file does not exist" = file.exists(opt$sce_file) +) + +# load SCE +suppressPackageStartupMessages({ + library(SingleCellExperiment) +}) + +# Extract colData -------------------------------------------------------------- + +# read in sce +sce <- readr::read_rds(opt$sce_file) + +# extract ids +library_id <- metadata(sce)$library_id +# account for multiplexed libraries that have multiple samples +# for now just combine sample ids into a single string and don't worry about demultiplexing +sample_id <- metadata(sce)$sample_id |> + paste0(collapse = ";") +project_id <- metadata(sce)$project_id + +# check if cell line since cell lines don't have any cell type assignments +# account for having more than one sample and a list of sample types +# all sample types should be the same theoretically +is_cell_line <- all(metadata(sce)$sample_type == "cell line") + +# only create and write table for non-cell line samples +if(!is_cell_line){ + + # get df with ids, barcodes, and cell type assignments + celltype_df <- colData(sce) |> + as.data.frame() |> + dplyr::mutate( + project_id = project_id, + sample_id = sample_id, + library_id = library_id + ) |> + dplyr::select( + project_id, + sample_id, + library_id, + barcodes, + contains("celltype") # get both singler and cellassign with ontology + ) + + # save tsv + readr::write_tsv(celltype_df, opt$output_file) + +} + From 6db6af4c2b12ec04eab784e050fe8aaf1ba9c268 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 13:08:18 -0600 Subject: [PATCH 02/25] initiate readme for module --- modules/cell-type-consensus/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 modules/cell-type-consensus/README.md diff --git a/modules/cell-type-consensus/README.md b/modules/cell-type-consensus/README.md new file mode 100644 index 0000000..66a7419 --- /dev/null +++ b/modules/cell-type-consensus/README.md @@ -0,0 +1,8 @@ +This module assigns a consensus cell type based on cell types assigned by `SingleR` and `CellAssign`. + +Scripts are derived from the the `cell-type-consensus` module of the [OpenScPCA-analysis](https://github.com/AlexsLemonade/OpenScPCA-analysis) repository. + +Links to specific original files used in this module: + +- `save-celltypes.R`: +- `assign-consensus-label.R`: From 2e4f68ca632ef2a8ac7e3f5259f50a3107a93800 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 14:29:14 -0600 Subject: [PATCH 03/25] make scripts executable --- .../usr/bin/assign-consensus-label.R | 74 ++++++++++--------- .../resources/usr/bin/save-celltypes.R | 37 +++++----- 2 files changed, 56 insertions(+), 55 deletions(-) mode change 100644 => 100755 modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R mode change 100644 => 100755 modules/cell-type-consensus/resources/usr/bin/save-celltypes.R diff --git a/modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R b/modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R old mode 100644 new mode 100755 index 937e4c2..37e0280 --- a/modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R +++ b/modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R @@ -1,36 +1,36 @@ #!/usr/bin/env Rscript -# This script is used to combine all TSV files containing cell types into a single TSV file -# The output TSV file will include the following added columns: +# This script is used to combine all TSV files containing cell types into a single TSV file +# The output TSV file will include the following added columns: # panglao_ontology: CL term assigned to panglao term -# panglao_annotation: human readable value associated with the CL term for panglao term -# blueprint_annotation_cl: human readable value associated with the CL term for singler_celltype_ontology -# consensus_annotation: human readable name associated with the consensus label -# consensus_ontology: CL ontology term for the consensus cell type +# panglao_annotation: human readable value associated with the CL term for panglao term +# blueprint_annotation_cl: human readable value associated with the CL term for singler_celltype_ontology +# consensus_annotation: human readable name associated with the consensus label +# consensus_ontology: CL ontology term for the consensus cell type library(optparse) option_list <- list( make_option( - opt_str = c("--celltype_tsv_dir"), + opt_str = c("--input_tsv_files"), type = "character", - help = "Path to directory containing TSV files with cell type annotations from single samples. - All TSV files in this directory will be combined into a single file." + help = "Comma separated list of input file paths corresponding to the TSV files with cell type annotations. + All TSV files in this list will be combined into a single file." ), make_option( opt_str = c("--panglao_ref_file"), - type = "character", + type = "character", help = "Path to file with panglao assignments and associated cell ontology ids" ), make_option( opt_str = c("--consensus_ref_file"), type = "character", help = "Path to file containing the reference for assigning consensus cell type labels" - ), + ), make_option( opt_str = c("--output_file"), type = "character", - help = "Path to file where combined TSV file will be saved. + help = "Path to file where combined TSV file will be saved. File name must end in either `.tsv` or `.tsv.gz` to save a compressed TSV file" ) ) @@ -40,15 +40,19 @@ opt <- parse_args(OptionParser(option_list = option_list)) # Prep ref files --------------------------------------------------------------- -# make sure reference files exist +# make sure reference files exist stopifnot( + "List of input files containing cell type assignments is missing" = !is.null(opt$input_tsv_files), "panglao reference file does not exist" = file.exists(opt$panglao_ref_file), "cell type consensus reference file does not exist" = file.exists(opt$consensus_ref_file), "output file must end in `.tsv` or `.tsv.gz`" = stringr::str_detect(opt$output_file, ".tsv|.tsv.gz") ) -# read in ref files -# change names for panglao ref to match what's in the consensus file +# list of paths to tsv files +all_files <- unlist(stringr::str_split(opt$input_tsv_files, ",")) + +# read in ref files +# change names for panglao ref to match what's in the consensus file panglao_ref_df <- readr::read_tsv(opt$panglao_ref_file) |> dplyr::rename( panglao_ontology = ontology_id, @@ -57,9 +61,9 @@ panglao_ref_df <- readr::read_tsv(opt$panglao_ref_file) |> ) consensus_ref_df <- readr::read_tsv(opt$consensus_ref_file) |> - # select columns to use for joining and consensus assigmments + # select columns to use for joining and consensus assigmments dplyr::select( - panglao_ontology, + panglao_ontology, original_panglao_name, blueprint_ontology, consensus_annotation, @@ -70,39 +74,39 @@ consensus_ref_df <- readr::read_tsv(opt$consensus_ref_file) |> blueprint_ref <- celldex::BlueprintEncodeData() # grab obo file, we need this to map the ontologies from blueprint -cl_ont <- ontologyIndex::get_ontology("http://purl.obolibrary.org/obo/cl/releases/2024-09-26/cl-basic.obo") +cl_ont <- ontologyIndex::get_ontology("http://purl.obolibrary.org/obo/cl/releases/2024-09-26/cl-basic.obo") # get ontologies and human readable name into data frame for blueprint -# in scpca-nf we don't include the cl name so this lets us add it in +# in scpca-nf we don't include the cl name so this lets us add it in blueprint_df <- data.frame( blueprint_ontology = blueprint_ref$label.ont, blueprint_annotation_cl = cl_ont$name[blueprint_ref$label.ont] ) |> - unique() |> + unique() |> tidyr::drop_na() -# get list of all TSV files -all_files <- list.files(path = opt$celltype_tsv_dir, - pattern = "*.tsv", - full.names = TRUE) +# Create combined TSV ---------------------------------------------------------- -# read in TSV files and combine into a single df -all_cells_df <- all_files |> - purrr::map(readr::read_tsv) |> - dplyr::bind_rows() |> +# read in TSV files and combine into a single df +all_cells_df <- all_files |> + purrr::map(readr::read_tsv) |> + dplyr::bind_rows() |> # add columns for panglao ontology and consensus - # first add panglao ontology + # first add panglao ontology dplyr::left_join(panglao_ref_df, by = c("cellassign_celltype_annotation" = "original_panglao_name")) |> # now add in all the blueprint columns - dplyr::left_join(blueprint_df, by = c("singler_celltype_ontology" = "blueprint_ontology")) |> + dplyr::left_join(blueprint_df, by = c("singler_celltype_ontology" = "blueprint_ontology")) |> # then add consensus labels - dplyr::left_join(consensus_ref_df, - by = c("singler_celltype_ontology" = "blueprint_ontology", - "cellassign_celltype_annotation" = "original_panglao_name", - "panglao_ontology")) |> + dplyr::left_join(consensus_ref_df, + by = c( + "singler_celltype_ontology" = "blueprint_ontology", + "cellassign_celltype_annotation" = "original_panglao_name", + "panglao_ontology" + ) + ) |> # use unknown for NA annotation but keep ontology ID as NA dplyr::mutate(consensus_annotation = dplyr::if_else(is.na(consensus_annotation), "Unknown", consensus_annotation)) -# export file +# export file readr::write_tsv(all_cells_df, opt$output_file) diff --git a/modules/cell-type-consensus/resources/usr/bin/save-celltypes.R b/modules/cell-type-consensus/resources/usr/bin/save-celltypes.R old mode 100644 new mode 100755 index 91081c8..7dff090 --- a/modules/cell-type-consensus/resources/usr/bin/save-celltypes.R +++ b/modules/cell-type-consensus/resources/usr/bin/save-celltypes.R @@ -1,13 +1,13 @@ #!/usr/bin/env Rscript -# This script is used to grab the cell type annotations from the +# This script is used to grab the cell type annotations from the # colData from a SCE object and save them to a TSV file library(optparse) option_list <- list( make_option( - opt_str = c("--sce_file"), + opt_str = c("--input_sce_file"), type = "character", help = "Path to RDS file containing a processed SingleCellExperiment object from scpca-nf" ), @@ -35,43 +35,40 @@ suppressPackageStartupMessages({ # Extract colData -------------------------------------------------------------- -# read in sce +# read in sce sce <- readr::read_rds(opt$sce_file) -# extract ids +# extract ids library_id <- metadata(sce)$library_id -# account for multiplexed libraries that have multiple samples -# for now just combine sample ids into a single string and don't worry about demultiplexing -sample_id <- metadata(sce)$sample_id |> +# account for multiplexed libraries that have multiple samples +# for now just combine sample ids into a single string and don't worry about demultiplexing +sample_id <- metadata(sce)$sample_id |> paste0(collapse = ";") project_id <- metadata(sce)$project_id -# check if cell line since cell lines don't have any cell type assignments +# check if cell line since cell lines don't have any cell type assignments # account for having more than one sample and a list of sample types -# all sample types should be the same theoretically +# all sample types should be the same theoretically is_cell_line <- all(metadata(sce)$sample_type == "cell line") # only create and write table for non-cell line samples -if(!is_cell_line){ - +if (!is_cell_line) { # get df with ids, barcodes, and cell type assignments - celltype_df <- colData(sce) |> - as.data.frame() |> + celltype_df <- colData(sce) |> + as.data.frame() |> dplyr::mutate( project_id = project_id, sample_id = sample_id, library_id = library_id - ) |> + ) |> dplyr::select( project_id, sample_id, library_id, - barcodes, - contains("celltype") # get both singler and cellassign with ontology + barcodes, + contains("celltype") # get both singler and cellassign with ontology ) - - # save tsv + + # save tsv readr::write_tsv(celltype_df, opt$output_file) - } - From 925c41516da2b31c6ebb9ff3a6f7efea22fa7575 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 14:59:57 -0600 Subject: [PATCH 04/25] add to main workflow --- main.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/main.nf b/main.nf index ca58951..a408c0d 100644 --- a/main.nf +++ b/main.nf @@ -6,6 +6,7 @@ include { simulate_sce } from './modules/simulate-sce' include { merge_sce } from './modules/merge-sce' include { detect_doublets } from './modules/doublet-detection' include { seurat_conversion } from './modules/seurat-conversion' +include { cell_type_consensus } from './modules/cell-type-consensus' // **** Parameter checks **** param_error = false @@ -57,4 +58,7 @@ workflow { // Run the seurat conversion workflow seurat_conversion(sample_ch) + + // Run the consensus cell type workflow + cell_type_consensus(sample_ch) } From b0762df73b6270a25420b314458e9b353c9642ff Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 15:00:20 -0600 Subject: [PATCH 05/25] workflow for running consensus cell types --- modules/cell-type-consensus/main.nf | 97 +++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 modules/cell-type-consensus/main.nf diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf new file mode 100644 index 0000000..765ccdd --- /dev/null +++ b/modules/cell-type-consensus/main.nf @@ -0,0 +1,97 @@ +#!/usr/bin/env nextflow + +// Workflow to assign consensus cell type labels + +// module parameters +params.panglao_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-analysis/blob/40d6db1bcd2e4bdca8d840e96ebae8fe19db5372/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv') +params.consensus_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-analysis/blob/40d6db1bcd2e4bdca8d840e96ebae8fe19db5372/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv') + +process save_celltypes { + container params.consensus_cell_type_container + tag "${sample_id}" + input: + tuple val(sample_id), + val(project_id), + path(library_files) + output: + tuple val(project_id), + path(output_files) + script: + output_files = library_files + .collect{ + it.name.replaceAll(/(?i).rds$/, "__original-cell-types.tsv") + } + """ + for file in ${library_files}; do + save-celltypes.R \ + --input_sce_file \$file \ + --output_file \$(basename \${file%.rds}__original-cell-types.tsv) + done + """ + + stub: + output_files = library_files + .collect{ + it.name.replaceAll(/(?i).rds$/, "_original-cell-types.tsv") + } + """ + for file in ${library_files}; do + touch \$(basename \${file%.rds}_original-cell-types.tsv) + done + """ +} + +process assign_consensus { + container params.consensus_cell_type_container + tag "${project_id}" + label 'mem_8' + publishDir "${params.results_bucket}/${params.release_prefix}/cell-type-consensus", mode: 'copy' + input: + tuple val(project_id), + path(cell_type_files) + output: + path consensus_output_file + script: + input_files = cell_type_files.join(',') + consensus_output_file = "${project_id}_consensus-cell-types.tsv.gz" + """ + assign-consensus-label.R \ + --input_tsv_files ${input_files} \ + --panglao_ref_file ${params.panglao_ref_file} \ + --consensus_ref_file ${consensus_ref_file} \ + --output_file ${consensus_output_file} + """ + + stub: + input_files = cell_type_files.join(',') + consensus_output_file = "${project_id}_consensus-cell-types.tsv.gz" + """ + touch ${consensus_output_file} + """ +} + + + +workflow cell_type_consensus { + take: + sample_ch // [sample_id, project_id, sample_path] + main: + // create [sample_id, project_id, [list of processed files]] + libraries_ch = sample_ch + .map{sample_id, project_id, sample_path -> + def library_files = Utils.getLibraryFiles(sample_path, format: "sce", process_level: "processed") + return [sample_id, project_id, library_files] + } + + // save cell type information for each library + save_celltypes(libraries_ch) + + cell_type_files_ch = save_celltypes.out + .groupTuple(by: 0) // group by project id + + // assign consensus cell types by project + assign_consensus(cell_type_files_ch) + + emit: + assign_consensus.out // [project_id, consensus_output_file] +} From e9e00edc29afb1678aaf54683e16013a162b67d2 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 15:00:39 -0600 Subject: [PATCH 06/25] consensus cell type container --- config/containers.config | 2 ++ 1 file changed, 2 insertions(+) diff --git a/config/containers.config b/config/containers.config index aafd69c..9483bef 100644 --- a/config/containers.config +++ b/config/containers.config @@ -18,4 +18,6 @@ params{ // seurat-conversion module seurat_conversion_container = 'public.ecr.aws/openscpca/seurat-conversion:v0.2.0' + // cell-type-consensus module + consensus_cell_type_container = 'public.ecr.aws/openscpca/cell-type-consensus:latest' } From 823a959371fd3944fa19a0913f66b73e001a8c9f Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 15:04:00 -0600 Subject: [PATCH 07/25] use original script names --- modules/cell-type-consensus/main.nf | 8 ++++---- ...assign-consensus-label.R => combine-celltype-tables.R} | 0 .../usr/bin/{save-celltypes.R => save-coldata.R} | 0 3 files changed, 4 insertions(+), 4 deletions(-) rename modules/cell-type-consensus/resources/usr/bin/{assign-consensus-label.R => combine-celltype-tables.R} (100%) rename modules/cell-type-consensus/resources/usr/bin/{save-celltypes.R => save-coldata.R} (100%) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index 765ccdd..3be7858 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -3,8 +3,8 @@ // Workflow to assign consensus cell type labels // module parameters -params.panglao_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-analysis/blob/40d6db1bcd2e4bdca8d840e96ebae8fe19db5372/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv') -params.consensus_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-analysis/blob/40d6db1bcd2e4bdca8d840e96ebae8fe19db5372/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv') +params.panglao_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-analysis/blob/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv') +params.consensus_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-analysis/blob/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv') process save_celltypes { container params.consensus_cell_type_container @@ -23,7 +23,7 @@ process save_celltypes { } """ for file in ${library_files}; do - save-celltypes.R \ + save-coldata.R \ --input_sce_file \$file \ --output_file \$(basename \${file%.rds}__original-cell-types.tsv) done @@ -55,7 +55,7 @@ process assign_consensus { input_files = cell_type_files.join(',') consensus_output_file = "${project_id}_consensus-cell-types.tsv.gz" """ - assign-consensus-label.R \ + combine-celltype-tables.R \ --input_tsv_files ${input_files} \ --panglao_ref_file ${params.panglao_ref_file} \ --consensus_ref_file ${consensus_ref_file} \ diff --git a/modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R similarity index 100% rename from modules/cell-type-consensus/resources/usr/bin/assign-consensus-label.R rename to modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R diff --git a/modules/cell-type-consensus/resources/usr/bin/save-celltypes.R b/modules/cell-type-consensus/resources/usr/bin/save-coldata.R similarity index 100% rename from modules/cell-type-consensus/resources/usr/bin/save-celltypes.R rename to modules/cell-type-consensus/resources/usr/bin/save-coldata.R From 154277b4eec8b8b8ef6857af767c9857306612e2 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 15:04:23 -0600 Subject: [PATCH 08/25] udpate permalinks in readme --- modules/cell-type-consensus/README.md | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/modules/cell-type-consensus/README.md b/modules/cell-type-consensus/README.md index 66a7419..5ba49a3 100644 --- a/modules/cell-type-consensus/README.md +++ b/modules/cell-type-consensus/README.md @@ -1,8 +1,13 @@ -This module assigns a consensus cell type based on cell types assigned by `SingleR` and `CellAssign`. +This module assigns a consensus cell type based on cell types assigned by `SingleR` and `CellAssign`. Scripts are derived from the the `cell-type-consensus` module of the [OpenScPCA-analysis](https://github.com/AlexsLemonade/OpenScPCA-analysis) repository. -Links to specific original files used in this module: +Links to specific original scripts used in this module: -- `save-celltypes.R`: -- `assign-consensus-label.R`: +- `save-coldata.R`: +- `combine-celltype-tables.R`: + +This module also uses the following reference files found in the `OpenScPCA-analysis` repository: + +- `panglao-cell-type-ontologies.tsv`: +- `consensus-cell-type-reference.tsv`: From d3b0ad141028698a8d597c2d163a38acd95a36f8 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 15:33:24 -0600 Subject: [PATCH 09/25] comment out other modules for faster testing --- main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index a408c0d..be536ab 100644 --- a/main.nf +++ b/main.nf @@ -51,13 +51,13 @@ workflow { .filter{ run_all || it[1] in project_ids } // Run the merge workflow - merge_sce(sample_ch) + //merge_sce(sample_ch) // Run the doublet detection workflow - detect_doublets(sample_ch) + //detect_doublets(sample_ch) // Run the seurat conversion workflow - seurat_conversion(sample_ch) + //seurat_conversion(sample_ch) // Run the consensus cell type workflow cell_type_consensus(sample_ch) From 1f7944cd5d6ba59070ae15ceaeb24e9c90471bca Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 15:37:43 -0600 Subject: [PATCH 10/25] use correct input name --- modules/cell-type-consensus/resources/usr/bin/save-coldata.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cell-type-consensus/resources/usr/bin/save-coldata.R b/modules/cell-type-consensus/resources/usr/bin/save-coldata.R index 7dff090..70601d9 100755 --- a/modules/cell-type-consensus/resources/usr/bin/save-coldata.R +++ b/modules/cell-type-consensus/resources/usr/bin/save-coldata.R @@ -25,7 +25,7 @@ opt <- parse_args(OptionParser(option_list = option_list)) # make sure input files exist stopifnot( - "sce file does not exist" = file.exists(opt$sce_file) + "sce file does not exist" = file.exists(opt$input_sce_file) ) # load SCE From b3caf00b97feb0a98417c2d678c63354a98d16e4 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 15:38:23 -0600 Subject: [PATCH 11/25] temporarily terminate if fail --- modules/cell-type-consensus/main.nf | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index 3be7858..e89936d 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -9,6 +9,7 @@ params.consensus_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-ana process save_celltypes { container params.consensus_cell_type_container tag "${sample_id}" + errorStrategy 'terminate' input: tuple val(sample_id), val(project_id), @@ -43,6 +44,7 @@ process save_celltypes { process assign_consensus { container params.consensus_cell_type_container + errorStrategy 'terminate' tag "${project_id}" label 'mem_8' publishDir "${params.results_bucket}/${params.release_prefix}/cell-type-consensus", mode: 'copy' From bf419b99ef10d29e14bfdcc2ac2e9c9e23c2135b Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 15:47:04 -0600 Subject: [PATCH 12/25] another argument mis named --- modules/cell-type-consensus/resources/usr/bin/save-coldata.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cell-type-consensus/resources/usr/bin/save-coldata.R b/modules/cell-type-consensus/resources/usr/bin/save-coldata.R index 70601d9..c754977 100755 --- a/modules/cell-type-consensus/resources/usr/bin/save-coldata.R +++ b/modules/cell-type-consensus/resources/usr/bin/save-coldata.R @@ -36,7 +36,7 @@ suppressPackageStartupMessages({ # Extract colData -------------------------------------------------------------- # read in sce -sce <- readr::read_rds(opt$sce_file) +sce <- readr::read_rds(opt$input_sce_file) # extract ids library_id <- metadata(sce)$library_id From baa20a699b301a7d5c1e8768a39f0d490e5f58bf Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 16:05:31 -0600 Subject: [PATCH 13/25] account for empty files because of cell lines --- .../resources/usr/bin/combine-celltype-tables.R | 5 ++++- modules/cell-type-consensus/resources/usr/bin/save-coldata.R | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R index 37e0280..1f3b5ff 100755 --- a/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R +++ b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R @@ -49,7 +49,10 @@ stopifnot( ) # list of paths to tsv files -all_files <- unlist(stringr::str_split(opt$input_tsv_files, ",")) +input_sce_files <- unlist(stringr::str_split(opt$input_tsv_files, ",")) +# check if any are empty, if so remove them +missing_files <- file.size(input_sce_files) > 0 +all_files <- all_files[!missing_files] # read in ref files # change names for panglao ref to match what's in the consensus file diff --git a/modules/cell-type-consensus/resources/usr/bin/save-coldata.R b/modules/cell-type-consensus/resources/usr/bin/save-coldata.R index c754977..a98d548 100755 --- a/modules/cell-type-consensus/resources/usr/bin/save-coldata.R +++ b/modules/cell-type-consensus/resources/usr/bin/save-coldata.R @@ -52,7 +52,10 @@ project_id <- metadata(sce)$project_id is_cell_line <- all(metadata(sce)$sample_type == "cell line") # only create and write table for non-cell line samples -if (!is_cell_line) { +if (is_cell_line) { + # make an empty filtered file + file.create(opt$output_file) +} else { # get df with ids, barcodes, and cell type assignments celltype_df <- colData(sce) |> as.data.frame() |> From 1bf5b2ea3f0f3acaac6e55dcb19dce3d55c4e82a Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 16:21:53 -0600 Subject: [PATCH 14/25] add missing params --- modules/cell-type-consensus/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index e89936d..56aeb01 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -60,7 +60,7 @@ process assign_consensus { combine-celltype-tables.R \ --input_tsv_files ${input_files} \ --panglao_ref_file ${params.panglao_ref_file} \ - --consensus_ref_file ${consensus_ref_file} \ + --consensus_ref_file ${params.consensus_ref_file} \ --output_file ${consensus_output_file} """ From ce05d63b0d07602ba1432691ad8e6fb57f738e54 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Tue, 14 Jan 2025 16:40:02 -0600 Subject: [PATCH 15/25] account for more than one library per sample --- modules/cell-type-consensus/main.nf | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index 56aeb01..c985bc6 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -20,13 +20,13 @@ process save_celltypes { script: output_files = library_files .collect{ - it.name.replaceAll(/(?i).rds$/, "__original-cell-types.tsv") + it.name.replaceAll(/(?i).rds$/, "_original-cell-types.tsv") } """ for file in ${library_files}; do save-coldata.R \ --input_sce_file \$file \ - --output_file \$(basename \${file%.rds}__original-cell-types.tsv) + --output_file \$(basename \${file%.rds}_original-cell-types.tsv) done """ @@ -90,6 +90,10 @@ workflow cell_type_consensus { cell_type_files_ch = save_celltypes.out .groupTuple(by: 0) // group by project id + .map{ project_id, celltype_files -> tuple( + project_id, + celltype_files.flatten() // get rid of nested tuple that occurs when more than one library maps to a sample + )} // assign consensus cell types by project assign_consensus(cell_type_files_ch) From 4f22c416d2d88ccf218eebd4eab6d27556e7985d Mon Sep 17 00:00:00 2001 From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com> Date: Wed, 15 Jan 2025 09:31:04 -0600 Subject: [PATCH 16/25] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- modules/cell-type-consensus/main.nf | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index c985bc6..a3bcc79 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -51,6 +51,8 @@ process assign_consensus { input: tuple val(project_id), path(cell_type_files) + path panglao_ref + path consensus_ref output: path consensus_output_file script: @@ -59,8 +61,8 @@ process assign_consensus { """ combine-celltype-tables.R \ --input_tsv_files ${input_files} \ - --panglao_ref_file ${params.panglao_ref_file} \ - --consensus_ref_file ${params.consensus_ref_file} \ + --panglao_ref_file ${panglao_ref} \ + --consensus_ref_file ${consensus_ref} \ --output_file ${consensus_output_file} """ @@ -96,7 +98,7 @@ workflow cell_type_consensus { )} // assign consensus cell types by project - assign_consensus(cell_type_files_ch) + assign_consensus(cell_type_files_ch, file(params.panglao_ref_file), file(params.consensus_ref_file)) emit: assign_consensus.out // [project_id, consensus_output_file] From 7dead7ce0b3f1f319c5f66efe8960f0559dc87c0 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Wed, 15 Jan 2025 09:48:01 -0600 Subject: [PATCH 17/25] fix typo with all files variable --- .../resources/usr/bin/combine-celltype-tables.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R index 1f3b5ff..87dcf86 100755 --- a/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R +++ b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R @@ -52,7 +52,7 @@ stopifnot( input_sce_files <- unlist(stringr::str_split(opt$input_tsv_files, ",")) # check if any are empty, if so remove them missing_files <- file.size(input_sce_files) > 0 -all_files <- all_files[!missing_files] +all_files <- input_sce_files[!missing_files] # read in ref files # change names for panglao ref to match what's in the consensus file From 636374195342d91b4a5a6e41646ce459e594b2c1 Mon Sep 17 00:00:00 2001 From: Ally Hawkins <54039191+allyhawkins@users.noreply.github.com> Date: Wed, 15 Jan 2025 11:03:21 -0600 Subject: [PATCH 18/25] Apply suggestions from code review Co-authored-by: Joshua Shapiro --- modules/cell-type-consensus/main.nf | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index a3bcc79..a11bca4 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -15,7 +15,8 @@ process save_celltypes { val(project_id), path(library_files) output: - tuple val(project_id), + tuple val(sample_id), + val(project_id), path(output_files) script: output_files = library_files @@ -91,8 +92,8 @@ workflow cell_type_consensus { save_celltypes(libraries_ch) cell_type_files_ch = save_celltypes.out - .groupTuple(by: 0) // group by project id - .map{ project_id, celltype_files -> tuple( + .groupTuple(by: 1) // group by project id + .map{sample_ids, project_id, celltype_files -> tuple( project_id, celltype_files.flatten() // get rid of nested tuple that occurs when more than one library maps to a sample )} From 621960720468272e72f174eab177c92e283941aa Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Wed, 15 Jan 2025 11:10:39 -0600 Subject: [PATCH 19/25] use raw github link --- modules/cell-type-consensus/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index a11bca4..6c226f0 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -3,8 +3,8 @@ // Workflow to assign consensus cell type labels // module parameters -params.panglao_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-analysis/blob/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv') -params.consensus_ref_file = file('https://github.com/AlexsLemonade/OpenScPCA-analysis/blob/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv') +params.panglao_ref_file = file('https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/blob/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv') +params.consensus_ref_file = file('https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv') process save_celltypes { container params.consensus_cell_type_container From c20291bb5a986f7c14a9585a724ab620a4c73bab Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Wed, 15 Jan 2025 11:11:52 -0600 Subject: [PATCH 20/25] fully fix link --- modules/cell-type-consensus/main.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index 6c226f0..85ab4da 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -3,7 +3,7 @@ // Workflow to assign consensus cell type labels // module parameters -params.panglao_ref_file = file('https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/blob/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv') +params.panglao_ref_file = file('https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv') params.consensus_ref_file = file('https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv') process save_celltypes { From 18f227d5d2f32747e9a2cbf9e4bdf7ec05259dd7 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Wed, 15 Jan 2025 11:17:42 -0600 Subject: [PATCH 21/25] add module params config --- config/module_params.config | 14 ++++++++++++++ modules/cell-type-consensus/main.nf | 4 ---- modules/merge-sce/main.nf | 5 ----- nextflow.config | 3 +++ 4 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 config/module_params.config diff --git a/config/module_params.config b/config/module_params.config new file mode 100644 index 0000000..458ff0d --- /dev/null +++ b/config/module_params.config @@ -0,0 +1,14 @@ +// Module specific parameters are stored here defined here as parameters +params{ + + // merge sce parameters + reuse_merge = false + max_merge_libraries = 75 // maximum number of libraries to merge (current number is a guess, based on 59 working, but 104 not) + num_hvg = 2000 // number of HVGs to select + + + // cell type consensus + panglao_ref_file = 'https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv' + consensus_ref_file = 'https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv' + +} diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index 85ab4da..54e9afc 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -2,10 +2,6 @@ // Workflow to assign consensus cell type labels -// module parameters -params.panglao_ref_file = file('https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/panglao-cell-type-ontologies.tsv') -params.consensus_ref_file = file('https://raw.githubusercontent.com/AlexsLemonade/OpenScPCA-analysis/b870a082bc9acd3536c5f8d2d52550d8fe8a4239/analyses/cell-type-consensus/references/consensus-cell-type-reference.tsv') - process save_celltypes { container params.consensus_cell_type_container tag "${sample_id}" diff --git a/modules/merge-sce/main.nf b/modules/merge-sce/main.nf index 968e8cf..21796c3 100644 --- a/modules/merge-sce/main.nf +++ b/modules/merge-sce/main.nf @@ -3,11 +3,6 @@ // Workflow to merge SCE objects into a single object. // This workflow does NOT perform integration, i.e. batch correction. -// module parameters -params.reuse_merge = false -params.max_merge_libraries = 75 // maximum number of libraries to merge (current number is a guess, based on 59 working, but 104 not) -params.num_hvg = 2000 // number of HVGs to select - // merge workflow variables def module_name = "merge-sce" def publish_merge_base = "${params.results_bucket}/${params.release_prefix}/${module_name}" diff --git a/nextflow.config b/nextflow.config index 0aacaec..27caa44 100644 --- a/nextflow.config +++ b/nextflow.config @@ -27,6 +27,9 @@ includeConfig 'config/process_base.config' // Load container definitions includeConfig 'config/containers.config' +// include module specific parameters +includeConfig 'config/module_params.config' + profiles { standard { process { From 08306e9845f568826ae7fd448f8f1f0d01bfdebb Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Wed, 15 Jan 2025 11:57:07 -0600 Subject: [PATCH 22/25] switch logical for missing files --- .../resources/usr/bin/combine-celltype-tables.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R index 87dcf86..f562fd6 100755 --- a/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R +++ b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R @@ -51,7 +51,7 @@ stopifnot( # list of paths to tsv files input_sce_files <- unlist(stringr::str_split(opt$input_tsv_files, ",")) # check if any are empty, if so remove them -missing_files <- file.size(input_sce_files) > 0 +missing_files <- file.size(input_sce_files) == 0 all_files <- input_sce_files[!missing_files] # read in ref files From 4cf30cb49f58ce6dc05a398f488fb7e4b5ddc20c Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Wed, 15 Jan 2025 12:35:06 -0600 Subject: [PATCH 23/25] account for entire projects with cell lines --- .../usr/bin/combine-celltype-tables.R | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R index f562fd6..dc79f5f 100755 --- a/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R +++ b/modules/cell-type-consensus/resources/usr/bin/combine-celltype-tables.R @@ -91,25 +91,31 @@ blueprint_df <- data.frame( # Create combined TSV ---------------------------------------------------------- -# read in TSV files and combine into a single df -all_cells_df <- all_files |> - purrr::map(readr::read_tsv) |> - dplyr::bind_rows() |> - # add columns for panglao ontology and consensus - # first add panglao ontology - dplyr::left_join(panglao_ref_df, by = c("cellassign_celltype_annotation" = "original_panglao_name")) |> - # now add in all the blueprint columns - dplyr::left_join(blueprint_df, by = c("singler_celltype_ontology" = "blueprint_ontology")) |> - # then add consensus labels - dplyr::left_join(consensus_ref_df, - by = c( - "singler_celltype_ontology" = "blueprint_ontology", - "cellassign_celltype_annotation" = "original_panglao_name", - "panglao_ontology" - ) - ) |> - # use unknown for NA annotation but keep ontology ID as NA - dplyr::mutate(consensus_annotation = dplyr::if_else(is.na(consensus_annotation), "Unknown", consensus_annotation)) - -# export file -readr::write_tsv(all_cells_df, opt$output_file) +# account for all samples being cell lines and no cell type annotations being present +if (length(all_files) == 0) { + # make an empty filtered file + file.create(opt$output_file) +} else { + # read in TSV files and combine into a single df + all_cells_df <- all_files |> + purrr::map(readr::read_tsv) |> + dplyr::bind_rows() |> + # add columns for panglao ontology and consensus + # first add panglao ontology + dplyr::left_join(panglao_ref_df, by = c("cellassign_celltype_annotation" = "original_panglao_name")) |> + # now add in all the blueprint columns + dplyr::left_join(blueprint_df, by = c("singler_celltype_ontology" = "blueprint_ontology")) |> + # then add consensus labels + dplyr::left_join(consensus_ref_df, + by = c( + "singler_celltype_ontology" = "blueprint_ontology", + "cellassign_celltype_annotation" = "original_panglao_name", + "panglao_ontology" + ) + ) |> + # use unknown for NA annotation but keep ontology ID as NA + dplyr::mutate(consensus_annotation = dplyr::if_else(is.na(consensus_annotation), "Unknown", consensus_annotation)) + + # export file + readr::write_tsv(all_cells_df, opt$output_file) +} From 9a838b92c0688c8b0dc92f6351079b1eaf8dbce3 Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Wed, 15 Jan 2025 13:14:02 -0600 Subject: [PATCH 24/25] Revert "comment out other modules for faster testing" This reverts commit d3b0ad141028698a8d597c2d163a38acd95a36f8. --- main.nf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/main.nf b/main.nf index be536ab..a408c0d 100644 --- a/main.nf +++ b/main.nf @@ -51,13 +51,13 @@ workflow { .filter{ run_all || it[1] in project_ids } // Run the merge workflow - //merge_sce(sample_ch) + merge_sce(sample_ch) // Run the doublet detection workflow - //detect_doublets(sample_ch) + detect_doublets(sample_ch) // Run the seurat conversion workflow - //seurat_conversion(sample_ch) + seurat_conversion(sample_ch) // Run the consensus cell type workflow cell_type_consensus(sample_ch) From 333f444df15b99131996da6cea9a1f7ef9f143eb Mon Sep 17 00:00:00 2001 From: Ally Hawkins Date: Wed, 15 Jan 2025 13:14:15 -0600 Subject: [PATCH 25/25] Revert "temporarily terminate if fail" This reverts commit b3caf00b97feb0a98417c2d678c63354a98d16e4. --- modules/cell-type-consensus/main.nf | 2 -- 1 file changed, 2 deletions(-) diff --git a/modules/cell-type-consensus/main.nf b/modules/cell-type-consensus/main.nf index 54e9afc..b10408d 100644 --- a/modules/cell-type-consensus/main.nf +++ b/modules/cell-type-consensus/main.nf @@ -5,7 +5,6 @@ process save_celltypes { container params.consensus_cell_type_container tag "${sample_id}" - errorStrategy 'terminate' input: tuple val(sample_id), val(project_id), @@ -41,7 +40,6 @@ process save_celltypes { process assign_consensus { container params.consensus_cell_type_container - errorStrategy 'terminate' tag "${project_id}" label 'mem_8' publishDir "${params.results_bucket}/${params.release_prefix}/cell-type-consensus", mode: 'copy'