v1.0/ATAC-seq_pipeline/pipeline-pe-blacklist-removal.cwl

#!/usr/bin/env cwl-runner
class: Workflow
cwlVersion: v1.0
doc: 'ATAC-seq pipeline - reads: PE - with blacklist removal'
requirements:
 - class: ScatterFeatureRequirement
 - class: SubworkflowFeatureRequirement
 - class: StepInputExpressionRequirement
inputs:
   input_fastq_read1_files:
     doc: Input fastq paired-end read 1 files
     type: File[]
   input_fastq_read2_files:
     doc: Input fastq paired-end read 2 files
     type: File[]
   ENCODE_blacklist_bedfile:
     doc: Bedfile containing ENCODE consensus blacklist regions to be excluded.
     type: File
   genome_sizes_file:
     doc: Genome sizes tab-delimited file (used in samtools)
     type: File
   default_adapters_file:
     doc: Adapters file
     type: File
   genome_effective_size:
     default: hs
     doc: Effective genome size used by MACS2. It can be numeric or a shortcuts:'hs' for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8), Default:hs
     type: string
   genome_ref_first_index_file:
     doc: '"First index file of Bowtie reference genome with extension 1.ebwt. \ (Note: the rest of the index files MUST be in the same folder)" '
     type: File
     secondaryFiles:
       - ^^.2.ebwt
       - ^^.3.ebwt
       - ^^.4.ebwt
       - ^^.rev.1.ebwt
       - ^^.rev.2.ebwt
   as_narrowPeak_file:
     doc: Definition narrowPeak file in AutoSql format (used in bedToBigBed)
     type: File
   trimmomatic_jar_path:
     doc: Trimmomatic Java jar file
     type: string
   trimmomatic_java_opts:
     doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
     type: string?
   picard_jar_path:
     doc: Picard Java jar file
     type: string
   picard_java_opts:
     doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
     type: string?
   nthreads_qc:
     doc: Number of threads required for the 01-qc step
     type: int
   nthreads_trimm:
     doc: Number of threads required for the 02-trim step
     type: int
   nthreads_map:
     doc: Number of threads required for the 03-map step
     type: int
   nthreads_peakcall:
     doc: Number of threads required for the 04-peakcall step
     type: int
   nthreads_quant:
     doc: Number of threads required for the 05-quantification step
     type: int
steps:
   qc:
     run: 01-qc-pe.cwl
     in:
       input_read1_fastq_files: input_fastq_read1_files
       input_read2_fastq_files: input_fastq_read2_files
       default_adapters_file: default_adapters_file
       nthreads: nthreads_qc
     out:
     - output_count_raw_reads_read1
     - output_diff_counts_read1
     - output_fastqc_report_files_read1
     - output_fastqc_data_files_read1
     - output_custom_adapters_read1
     - output_count_raw_reads_read2
     - output_diff_counts_read2
     - output_fastqc_report_files_read2
     - output_fastqc_data_files_read2
     - output_custom_adapters_read2
   trimm:
     run: 02-trim-pe.cwl
     in:
       input_read1_fastq_files: input_fastq_read1_files
       input_read1_adapters_files: qc/output_custom_adapters_read1
       input_read2_fastq_files: input_fastq_read2_files
       input_read2_adapters_files: qc/output_custom_adapters_read2
       trimmomatic_jar_path: trimmomatic_jar_path
       trimmomatic_java_opts: trimmomatic_java_opts
       nthreads: nthreads_trimm
     out:
     - output_data_fastq_read1_trimmed_files
     - output_trimmed_read1_fastq_read_count
     - output_data_fastq_read2_trimmed_files
     - output_trimmed_read2_fastq_read_count
   map:
     run: 03-map-pe-blacklist-removal.cwl
     in:
       input_fastq_read1_files: trimm/output_data_fastq_read1_trimmed_files
       input_fastq_read2_files: trimm/output_data_fastq_read2_trimmed_files
       ENCODE_blacklist_bedfile: ENCODE_blacklist_bedfile
       genome_sizes_file: genome_sizes_file
       genome_ref_first_index_file: genome_ref_first_index_file
       picard_jar_path: picard_jar_path
       picard_java_opts: picard_java_opts
       nthreads: nthreads_map
     out:
     - output_data_sorted_dedup_bam_files
     - output_data_sorted_dups_marked_bam_files
     - output_picard_mark_duplicates_files
     - output_pbc_files
     - output_bowtie_log
     - output_preseq_c_curve_files
     - output_percentage_uniq_reads
     - output_read_count_mapped
     - output_percent_mitochondrial_reads
   peak_call:
     run: 04-peakcall-pe.cwl
     in:
       input_bam_files: map/output_data_sorted_dedup_bam_files
       input_bam_format:
         valueFrom: BAMPE
       genome_effective_size: genome_effective_size
       input_genome_sizes: genome_sizes_file
       as_narrowPeak_file: as_narrowPeak_file
       nthreads: nthreads_peakcall
     out:
     - output_spp_x_cross_corr
     - output_spp_cross_corr_plot
     - output_read_in_peak_count_within_replicate
     - output_peak_file
     - output_peak_bigbed_file
     - output_peak_summits_file
     - output_extended_peak_file
     - output_peak_xls_file
     - output_filtered_read_count_file
     - output_peak_count_within_replicate
     - output_unpaired_peak_file
     - output_unpaired_peak_bigbed_file
     - output_unpaired_peak_summits_file
     - output_unpaired_extended_peak_file
     - output_unpaired_peak_xls_file
     - output_unpaired_filtered_read_count_file
     - output_unpaired_peak_count_within_replicate
   quant:
     run: 05-quantification.cwl
     in:
       input_bam_files: map/output_data_sorted_dedup_bam_files
       input_genome_sizes: genome_sizes_file
       nthreads: nthreads_quant
     out:
     - bigwig_raw_files
     - bigwig_norm_files
outputs:
   qc_fastqc_data_files_read1:
     doc: FastQC data files for paired_read1
     type: File[]
     outputSource: qc/output_fastqc_data_files_read1
   qc_fastqc_report_files_read1:
     doc: FastQC reports in zip format for paired_read1
     type: File[]
     outputSource: qc/output_fastqc_report_files_read1
   qc_count_raw_reads_read1:
     doc: Raw read counts of fastq files for paired_read1 after QC
     type: File[]
     outputSource: qc/output_count_raw_reads_read1
   qc_diff_counts_read1:
     doc: Diff file between number of raw reads and number of reads counted by FASTQC, for paired_read1
     type: File[]
     outputSource: qc/output_diff_counts_read1
   trimm_fastq_files_read1:
     doc: FASTQ files  for paired_read1 after trimming
     type: File[]
     outputSource: trimm/output_data_fastq_read1_trimmed_files
   trimm_raw_counts_read1:
     doc: Raw read counts for paired_read1 of fastq files after trimming
     type: File[]
     outputSource: trimm/output_trimmed_read1_fastq_read_count
   qc_fastqc_data_files_read2:
     doc: FastQC data files for paired_read2
     type: File[]
     outputSource: qc/output_fastqc_data_files_read2
   qc_fastqc_report_files_read2:
     doc: FastQC reports in zip format for paired_read2
     type: File[]
     outputSource: qc/output_fastqc_report_files_read2
   qc_count_raw_reads_read2:
     doc: Raw read counts of fastq files for paired_read2 after QC
     type: File[]
     outputSource: qc/output_count_raw_reads_read2
   qc_diff_counts_read2:
     doc: Diff file between number of raw reads and number of reads counted by FASTQC, for paired_read2
     type: File[]
     outputSource: qc/output_diff_counts_read2
   trimm_fastq_files_read2:
     doc: FASTQ files  for paired_read2 after trimming
     type: File[]
     outputSource: trimm/output_data_fastq_read2_trimmed_files
   trimm_raw_counts_read2:
     doc: Raw read counts for paired_read2 of fastq files after trimming
     type: File[]
     outputSource: trimm/output_trimmed_read2_fastq_read_count
   map_read_count_mapped:
     doc: Read counts of the mapped BAM files
     type: File[]
     outputSource: map/output_read_count_mapped
   map_bowtie_log_files:
     doc: Bowtie log file with mapping stats
     type: File[]
     outputSource: map/output_bowtie_log
   map_preseq_percentage_uniq_reads:
     doc: Preseq percentage of uniq reads
     type: File[]
     outputSource: map/output_percentage_uniq_reads
   map_pbc_files:
     doc: PCR Bottleneck Coefficient files (used to flag samples when pbc<0.5)
     type: File[]
     outputSource: map/output_pbc_files
   map_dedup_bam_files:
     doc: Filtered BAM files (post-processing end point)
     type: File[]
     outputSource: map/output_data_sorted_dups_marked_bam_files
   map_mark_duplicates_files:
     doc: Summary of duplicates removed with Picard tool MarkDuplicates (for multiple reads aligned to the same positions
     type: File[]
     outputSource: map/output_picard_mark_duplicates_files
   map_preseq_c_curve_files:
     doc: Preseq c_curve output files
     type: File[]
     outputSource: map/output_preseq_c_curve_files
   map_percent_mitochondrial_reads:
     doc: Percentage of mitochondrial reads
     type: File[]
     outputSource: map/output_percent_mitochondrial_reads
   peakcall_peak_file:
     doc: Peaks in ENCODE Peak file format
     type: File[]
     outputSource: peak_call/output_peak_file
   peakcall_spp_x_cross_corr:
     doc: SPP strand cross correlation summary
     type: File[]
     outputSource: peak_call/output_spp_x_cross_corr
   peakcall_peak_xls_file:
     doc: Peak calling report file
     type: File[]
     outputSource: peak_call/output_peak_xls_file
   peakcall_peak_summits_file:
     doc: Peaks summits in bedfile format
     type: File[]
     outputSource: peak_call/output_peak_summits_file
   peakcall_peak_count_within_replicate:
     doc: Peak counts within replicate
     type: File[]
     outputSource: peak_call/output_peak_count_within_replicate
   peakcall_spp_x_cross_corr_plot:
     doc: SPP strand cross correlation plot
     type: File[]
     outputSource: peak_call/output_spp_cross_corr_plot
   peakcall_filtered_read_count_file:
     doc: Filtered read count after peak calling
     type: File[]
     outputSource: peak_call/output_filtered_read_count_file
   peakcall_extended_peak_file:
     doc: Extended fragment peaks in ENCODE Peak file format
     type: File[]
     outputSource: peak_call/output_extended_peak_file
   peakcall_read_in_peak_count_within_replicate:
     doc: Peak counts within replicate
     type: File[]
     outputSource: peak_call/output_read_in_peak_count_within_replicate
   peakcall_peak_bigbed_file:
     doc: Peaks in bigBed format
     type: File[]
     outputSource: peak_call/output_peak_bigbed_file
   peakcall_output_unpaired_peak_xls_file:
     doc: Peak calling report file (*_peaks.xls file produced by MACS2) using each paired mate independently
     type: File[]
     outputSource: peak_call/output_unpaired_peak_xls_file
   peakcall_output_unpaired_extended_peak_file:
     doc: peakshift/phantomPeak extended fragment results file using each paired mate independently
     type: File[]
     outputSource: peak_call/output_unpaired_extended_peak_file
   peakcall_output_unpaired_peak_count_within_replicate:
     doc: Peak counts within replicate using each paired mate independently
     type: File[]
     outputSource: peak_call/output_unpaired_peak_count_within_replicate
   peakcall_output_unpaired_peak_bigbed_file:
     doc: peakshift/phantomPeak results bigbed file using each paired mate independently
     type: File[]
     outputSource: peak_call/output_unpaired_peak_bigbed_file
   peakcall_output_unpaired_peak_file:
     doc: peakshift/phantomPeak results file using each paired mate independently
     type: File[]
     outputSource: peak_call/output_unpaired_peak_file
   peakcall_output_unpaired_peak_summits_file:
     doc: File containing peak summits using each paired mate independently
     type: File[]
     outputSource: peak_call/output_unpaired_peak_summits_file
   peakcall_output_unpaired_filtered_read_count_file:
     doc: Filtered read count reported by MACS2 using each paired mate independently
     type: File[]
     outputSource: peak_call/output_unpaired_filtered_read_count_file
   quant_bigwig_raw_files:
     doc: Raw reads bigWig (signal) files
     type: File[]
     outputSource: quant/bigwig_raw_files
   quant_bigwig_norm_files:
     doc: Normalized reads bigWig (signal) files
     type: File[]
     outputSource: quant/bigwig_norm_files