Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nanopore support #134

Merged
merged 11 commits into from
Jul 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions configs/conda.config
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
process {
withLabel: hisat2 { conda = "$baseDir/envs/hisat2.yaml" }
withLabel: minimap2 { conda = "$baseDir/envs/minimap2.yaml" }
withLabel: python3 { conda = "$baseDir/envs/python3.yaml" }
withLabel: deseq2 { conda = "$baseDir/envs/deseq2.yaml" }
withLabel: fastp { conda = "$baseDir/envs/fastp.yaml" }
withLabel: fastqc { conda = "$baseDir/envs/fastqc.yaml" }
withLabel: nanoplot { conda = "$baseDir/envs/nanoplot.yaml" }
withLabel: subread { conda = "$baseDir/envs/subread.yaml" }
withLabel: multiqc { conda = "$baseDir/envs/multiqc.yaml" }
withLabel: sortmerna { conda = "$baseDir/envs/sortmerna.yaml" }
Expand Down
2 changes: 2 additions & 0 deletions configs/container.config
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
process {
withLabel: hisat2 { container = 'nanozoo/hisat2:2.1.0--66dae66' }
withLabel: minimap2 { container = 'nanozoo/minimap2:2.18--618fb68' }
withLabel: python3 { container = 'nanozoo/python_rnaseq:3.8--7a7808c' }
withLabel: deseq2 { container = 'nanozoo/deseq2:1.28.0--0df1612' }
withLabel: fastp { container = 'nanozoo/fastp:0.20.0--78a7c63' }
withLabel: fastqc { container = 'nanozoo/fastqc:0.11.8--fbfa1d7' }
withLabel: subread { container = 'nanozoo/subread:2.0.1--713a8e7' }
withLabel: multiqc { container = 'nanozoo/multiqc:1.9--aba729b' }
withLabel: nanoplot { container = 'nanozoo/nanoplot:1.38.0--11e5b96' }
withLabel: sortmerna { container = 'nanozoo/sortmerna:2.1b--ceea1a1' }
withLabel: trinity { container = 'nanozoo/trinity:2.11.0--3354a71' }
withLabel: stringtie { container = 'nanozoo/stringtie:2.1.2--93e41f1' }
Expand Down
2 changes: 2 additions & 0 deletions configs/local.config
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
process {
withLabel: hisat2 { cpus = params.cores }
withLabel: minimap2 { cpus = params.cores }
withLabel: deseq2 { cpus = params.cores }
withLabel: fastp { cpus = params.cores }
withLabel: fastqc { cpus = 1 }
withLabel: subread { cpus = params.cores }
withLabel: multiqc { cpus = params.cores }
withLabel: nanoplot { cpus = 4 }
withLabel: sortmerna { cpus = params.cores }
withLabel: trinity { cpus = params.cores; memory = params.memory }
withLabel: stringtie { cpus = params.cores}
Expand Down
6 changes: 6 additions & 0 deletions envs/minimap2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name: minimap2
channels:
- bioconda
dependencies:
- minimap2=2.18
- samtools=1.11
7 changes: 7 additions & 0 deletions envs/nanoplot.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
name: nanoplot
channels:
- bioconda
- conda-forge
dependencies:
- nanoplot=1.38.0
- seaborn==0.10.1
141 changes: 106 additions & 35 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ nextflow.enable.dsl=2

// Parameters sanity checking

Set valid_params = ['max_cores', 'cores', 'memory', 'profile', 'help', 'reads', 'genome', 'annotation', 'deg', 'autodownload', 'pathway', 'species', 'include_species', 'strand', 'mode', 'tpm', 'fastp_additional_params', 'hisat2_additional_params', 'featurecounts_additional_params', 'feature_id_type', 'busco_db', 'dammit_uniref90', 'skip_sortmerna', 'assembly', 'output', 'fastp_dir', 'sortmerna_dir', 'hisat2_dir', 'featurecounts_dir', 'tpm_filter_dir', 'annotation_dir', 'deseq2_dir', 'assembly_dir', 'rnaseq_annotation_dir', 'uniref90_dir', 'multiqc_dir', 'nf_runinfo_dir', 'permanentCacheDir', 'condaCacheDir', 'singularityCacheDir', 'softlink_results', 'cloudProcess', 'permanent-cache-dir', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process'] // don't ask me why there is 'permanent-cache-dir', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process'
Set valid_params = ['max_cores', 'cores', 'memory', 'profile', 'help', 'reads', 'genome', 'nanopore', 'minimap2_additional_params', 'minimap2_dir', 'annotation', 'deg', 'autodownload', 'pathway', 'species', 'include_species', 'strand', 'mode', 'tpm', 'fastp_additional_params', 'hisat2_additional_params', 'featurecounts_additional_params', 'feature_id_type', 'busco_db', 'dammit_uniref90', 'skip_sortmerna', 'assembly', 'output', 'fastp_dir', 'sortmerna_dir', 'hisat2_dir', 'featurecounts_dir', 'tpm_filter_dir', 'annotation_dir', 'deseq2_dir', 'assembly_dir', 'rnaseq_annotation_dir', 'uniref90_dir', 'readqc_dir', 'multiqc_dir', 'nf_runinfo_dir', 'permanentCacheDir', 'condaCacheDir', 'singularityCacheDir', 'softlink_results', 'cloudProcess', 'permanent-cache-dir', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process'] // don't ask me why there is 'permanent-cache-dir', 'conda-cache-dir', 'singularity-cache-dir', 'cloud-process'
def parameter_diff = params.keySet() - valid_params
if (parameter_diff.size() != 0){
exit 1, "ERROR: Parameter(s) $parameter_diff is/are not valid in the pipeline!\n"
Expand Down Expand Up @@ -80,6 +80,11 @@ if (params.assembly) {
}
}

if (params.nanopore) {
println "\u001B[32mPerform processing of reads in Nanopore mode instead default short-read mode. After mapping, the same steps are used as for Illumina."
}


Set species = ['hsa', 'eco', 'mmu', 'mau']
Set autodownload = ['hsa', 'eco', 'mmu', 'mau']
Set pathway = ['hsa', 'mmu', 'mau']
Expand Down Expand Up @@ -115,6 +120,7 @@ log.info """\
Strandness: $params.strand
TPM threshold: $params.tpm
Comparisons: $comparison
Nanopore mode: $params.nanopore
"""
.stripIndent()

Expand All @@ -138,10 +144,10 @@ if (params.reads) {
return [ sample, read, condition, source ]
}
.tap { annotated_reads }
.tap { illumina_input_ch }
.tap { read_input_ch }
.map { sample, read, condition, source ->
return [ sample, [ read ] ] }
.set { illumina_input_ch }
.set { read_input_ch }

} else {
Channel
Expand All @@ -156,10 +162,10 @@ if (params.reads) {
return [ sample, read1, read2, condition, source ]
}
.tap { annotated_reads }
.tap { illumina_input_ch }
.tap { read_input_ch }
.map { sample, read1, read2, condition, source ->
return [ sample, [ read1, read2 ] ] }
.set { illumina_input_ch }
.set { read_input_ch }
}
}

Expand Down Expand Up @@ -324,17 +330,20 @@ include {referenceGet; concat_genome} from './modules/referenceGet'
include {annotationGet; concat_annotation} from './modules/annotationGet'
include {sortmernaGet} from './modules/sortmernaGet'
include {hisat2index} from './modules/hisat2'
include {minimap2index} from './modules/minimap2'
include {buscoGetDB} from './modules/buscoGetDB'
include {dammitGetDB} from './modules/dammitGetDB'

// analysis
include {fastp} from './modules/fastp'
include {sortmerna} from './modules/sortmerna'
include {hisat2; index_bam} from './modules/hisat2'
include {minimap2; index_bam_minimap2} from './modules/minimap2'
include {featurecounts} from './modules/featurecounts'
include {tpm_filter} from './modules/tpm_filter'
include {deseq2} from './modules/deseq2'
include {fastqc as fastqcPre; fastqc as fastqcPost} from './modules/fastqc'
include {nanoplot as nanoplot} from './modules/nanoplot'
include {multiqc; multiqc_sample_names} from './modules/multiqc'

// assembly & annotation
Expand Down Expand Up @@ -474,20 +483,20 @@ workflow download_dammit {
**************************/

/***************************************
Preprocess RNA-Seq reads: qc, trimming, adapters, rRNA-removal, mapping
Preprocess Illumina RNA-Seq reads: qc, trimming, adapters, rRNA-removal, mapping
*/
workflow preprocess {
workflow preprocess_illumina {
take:
illumina_input_ch
read_input_ch
reference
sortmerna_db

main:
// initial QC of raw reads
fastqcPre(illumina_input_ch)
fastqcPre(read_input_ch)

// trim with fastp
fastp(illumina_input_ch)
fastp(read_input_ch)

// QC after fastp
fastqcPost(fastp.out.sample_trimmed)
Expand All @@ -513,13 +522,54 @@ workflow preprocess {
sample_bam_ch = hisat2.out.sample_bam
fastp_json_report = fastp.out.json_report
sortmerna_log
hisat2_log = hisat2.out.log
fastqcPre = fastqcPre.out.zip
fastqcPost = fastqcPost.out.zip
mapping_log = hisat2.out.log
readqcPre = fastqcPre.out.zip
readqcPost = fastqcPost.out.zip
cleaned_reads_ch = sortmerna_no_rna_fastq
}

/***************************************
Preprocess Illumina RNA-Seq reads: qc, trimming, adapters, rRNA-removal, mapping
*/
workflow preprocess_nanopore {
take:
read_input_ch
reference
sortmerna_db

main:
// initial QC of raw reads
nanoplot(read_input_ch)

if ( params.skip_sortmerna ) {
sortmerna_no_rna_fastq = read_input_ch
sortmerna_log = Channel.empty()
} else {
// remove rRNA with SortmeRNA
sortmerna(read_input_ch, extract_tar_bz2(sortmerna_db))
sortmerna_no_rna_fastq = sortmerna.out.no_rna_fastq
sortmerna_log = sortmerna.out.log
}

// Minimap2 index
minimap2index(reference)
// map with Minimap2
minimap2(sortmerna_no_rna_fastq, minimap2index.out, params.minimap2_additional_params)
// index BAM files
index_bam_minimap2(minimap2.out.sample_bam)

emit:
sample_bam_ch = minimap2.out.sample_bam
fastp_json_report = Channel.empty()
sortmerna_log
mapping_log = minimap2.out.log
readqcPre = nanoplot.out.zip
readqcPost = Channel.empty()
cleaned_reads_ch = sortmerna_no_rna_fastq
}



/******************************************
Differential gene expression analysis using a genome reference
*/
Expand All @@ -528,9 +578,9 @@ workflow expression_reference_based {
sample_bam_ch
fastp_json_report
sortmerna_log
hisat2_log
fastqcPre
fastqcPost
mapping_log
readqcPre
readqcPost
annotation
deg_comparisons_input_ch
deseq2_script
Expand Down Expand Up @@ -580,21 +630,21 @@ workflow expression_reference_based {
.map { it.join(",") }

// run DEseq2
deseq2(regionReport_config, tpm_filter.out.filtered_counts, annotated_sample.condition.collect(),
annotated_sample.col_label.collect(), deseq2_comparisons, format_annotation.out, format_annotation_gene_rows.out,
annotated_sample.source.collect(), species_pathway_ch, deseq2_script, deseq2_id_type_ch, deseq2_script_refactor_reportingtools_table,
deseq2_script_improve_deseq_table)
// deseq2(regionReport_config, tpm_filter.out.filtered_counts, annotated_sample.condition.collect(),
// annotated_sample.col_label.collect(), deseq2_comparisons, format_annotation.out, format_annotation_gene_rows.out,
// annotated_sample.source.collect(), species_pathway_ch, deseq2_script, deseq2_id_type_ch, deseq2_script_refactor_reportingtools_table,
// deseq2_script_improve_deseq_table)

// run MultiQC
multiqc_sample_names(annotated_reads.map{ row -> row[0..-3]}.collect())
multiqc(multiqc_config,
multiqc_sample_names.out,
fastp_json_report.collect(),
fastp_json_report.collect().ifEmpty([]),
sortmerna_log.collect().ifEmpty([]),
hisat2_log.collect(),
mapping_log.collect(),
featurecounts.out.log.collect(),
fastqcPre.collect(),
fastqcPost.collect(),
readqcPre.collect().ifEmpty([]),
readqcPost.collect().ifEmpty([]),
tpm_filter.out.stats,
params.tpm
)
Expand Down Expand Up @@ -687,8 +737,12 @@ workflow {
sortmerna_db = Channel.empty()
}

// preprocess RNA-Seq reads
preprocess(illumina_input_ch, reference, sortmerna_db)
// preprocess RNA-Seq reads (Illumina or Nanopore)
if (!params.nanopore) {
preprocess_illumina(read_input_ch, reference, sortmerna_db)
} else {
preprocess_nanopore(read_input_ch, reference, sortmerna_db)
}

// perform assembly & annotation
if (params.assembly) {
Expand All @@ -702,18 +756,33 @@ workflow {
} else {
// perform expression analysis
// start reference-based differential gene expression analysis
expression_reference_based(preprocess.out.sample_bam_ch,
preprocess.out.fastp_json_report,
preprocess.out.sortmerna_log,
preprocess.out.hisat2_log,
preprocess.out.fastqcPre,
preprocess.out.fastqcPost,
if (!params.nanopore) {
expression_reference_based(preprocess_illumina.out.sample_bam_ch,
preprocess_illumina.out.fastp_json_report,
preprocess_illumina.out.sortmerna_log,
preprocess_illumina.out.mapping_log,
preprocess_illumina.out.readqcPre,
preprocess_illumina.out.readqcPost,
annotation,
deg_comparisons_input_ch,
deseq2_script,
deseq2_script_refactor_reportingtools_table,
deseq2_script_improve_deseq_table,
multiqc_config)
} else {
expression_reference_based(preprocess_nanopore.out.sample_bam_ch,
preprocess_nanopore.out.fastp_json_report,
preprocess_nanopore.out.sortmerna_log,
preprocess_nanopore.out.mapping_log,
preprocess_nanopore.out.readqcPre,
preprocess_nanopore.out.readqcPost,
annotation,
deg_comparisons_input_ch,
deseq2_script,
deseq2_script_refactor_reportingtools_table,
deseq2_script_improve_deseq_table,
multiqc_config)
}
}
}

Expand Down Expand Up @@ -758,10 +827,12 @@ def helpMSG() {

${c_yellow}Preprocessing options:${c_reset}
--mode Either 'single' (single-end) or 'paired' (paired-end) sequencing [default: $params.mode]
--fastp_additional_params additional parameters for fastp [default: $params.fastp_additional_params]
--fastp_additional_params Additional parameters for fastp [default: $params.fastp_additional_params]
--skip_sortmerna Skip rRNA removal via SortMeRNA [default: $params.skip_sortmerna]
--hisat2_additional_params additional parameters for HISAT2 [default: $params.hisat2_additional_params]
--featurecounts_additional_params additional parameters for FeatureCounts [default: $params.featurecounts_additional_params]
--hisat2_additional_params Additional parameters for HISAT2 [default: $params.hisat2_additional_params]
--minimap2_additional_params Additional parameters for minimap2 (Nanopore input) [default: $params.minimap2_additional_params]
--featurecounts_additional_params Additional parameters for FeatureCounts [default: $params.featurecounts_additional_params]
--nanopore Activate Nanopore long-read mode (default is Illumina data) [default: $params.nanopore]

${c_yellow}DEG analysis options:${c_reset}
--strand 0 (unstranded), 1 (stranded) and 2 (reversely stranded) [default: $params.strand]
Expand Down
12 changes: 9 additions & 3 deletions modules/featurecounts.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,15 @@ process featurecounts {

script:
if (params.mode == 'single') {
"""
featureCounts -T ${task.cpus} -s ${params.strand} -a ${annotation} -o ${name}.counts.tsv ${additionalParams} ${bam}
"""
if (params.nanopore == true) {
"""
featureCounts -L -T ${task.cpus} -s ${params.strand} -a ${annotation} -o ${name}.counts.tsv ${additionalParams} ${bam}
"""
} else {
"""
featureCounts -T ${task.cpus} -s ${params.strand} -a ${annotation} -o ${name}.counts.tsv ${additionalParams} ${bam}
"""
}
}
else {
"""
Expand Down
Loading