-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathpipeline-pe-blacklist-removal.cwl
316 lines (316 loc) · 11.9 KB
/
pipeline-pe-blacklist-removal.cwl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
#!/usr/bin/env cwl-runner
class: Workflow
cwlVersion: v1.0
doc: 'ATAC-seq pipeline - reads: PE - with blacklist removal'
requirements:
- class: ScatterFeatureRequirement
- class: SubworkflowFeatureRequirement
- class: StepInputExpressionRequirement
inputs:
input_fastq_read1_files:
doc: Input fastq paired-end read 1 files
type: File[]
input_fastq_read2_files:
doc: Input fastq paired-end read 2 files
type: File[]
ENCODE_blacklist_bedfile:
doc: Bedfile containing ENCODE consensus blacklist regions to be excluded.
type: File
genome_sizes_file:
doc: Genome sizes tab-delimited file (used in samtools)
type: File
default_adapters_file:
doc: Adapters file
type: File
genome_effective_size:
default: hs
doc: Effective genome size used by MACS2. It can be numeric or a shortcuts:'hs' for human (2.7e9), 'mm' for mouse (1.87e9), 'ce' for C. elegans (9e7) and 'dm' for fruitfly (1.2e8), Default:hs
type: string
genome_ref_first_index_file:
doc: '"First index file of Bowtie reference genome with extension 1.ebwt. \ (Note: the rest of the index files MUST be in the same folder)" '
type: File
secondaryFiles:
- ^^.2.ebwt
- ^^.3.ebwt
- ^^.4.ebwt
- ^^.rev.1.ebwt
- ^^.rev.2.ebwt
as_narrowPeak_file:
doc: Definition narrowPeak file in AutoSql format (used in bedToBigBed)
type: File
trimmomatic_jar_path:
doc: Trimmomatic Java jar file
type: string
trimmomatic_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
picard_jar_path:
doc: Picard Java jar file
type: string
picard_java_opts:
doc: JVM arguments should be a quoted, space separated list (e.g. "-Xms128m -Xmx512m")
type: string?
nthreads_qc:
doc: Number of threads required for the 01-qc step
type: int
nthreads_trimm:
doc: Number of threads required for the 02-trim step
type: int
nthreads_map:
doc: Number of threads required for the 03-map step
type: int
nthreads_peakcall:
doc: Number of threads required for the 04-peakcall step
type: int
nthreads_quant:
doc: Number of threads required for the 05-quantification step
type: int
steps:
qc:
run: 01-qc-pe.cwl
in:
input_read1_fastq_files: input_fastq_read1_files
input_read2_fastq_files: input_fastq_read2_files
default_adapters_file: default_adapters_file
nthreads: nthreads_qc
out:
- output_count_raw_reads_read1
- output_diff_counts_read1
- output_fastqc_report_files_read1
- output_fastqc_data_files_read1
- output_custom_adapters_read1
- output_count_raw_reads_read2
- output_diff_counts_read2
- output_fastqc_report_files_read2
- output_fastqc_data_files_read2
- output_custom_adapters_read2
trimm:
run: 02-trim-pe.cwl
in:
input_read1_fastq_files: input_fastq_read1_files
input_read1_adapters_files: qc/output_custom_adapters_read1
input_read2_fastq_files: input_fastq_read2_files
input_read2_adapters_files: qc/output_custom_adapters_read2
trimmomatic_jar_path: trimmomatic_jar_path
trimmomatic_java_opts: trimmomatic_java_opts
nthreads: nthreads_trimm
out:
- output_data_fastq_read1_trimmed_files
- output_trimmed_read1_fastq_read_count
- output_data_fastq_read2_trimmed_files
- output_trimmed_read2_fastq_read_count
map:
run: 03-map-pe-blacklist-removal.cwl
in:
input_fastq_read1_files: trimm/output_data_fastq_read1_trimmed_files
input_fastq_read2_files: trimm/output_data_fastq_read2_trimmed_files
ENCODE_blacklist_bedfile: ENCODE_blacklist_bedfile
genome_sizes_file: genome_sizes_file
genome_ref_first_index_file: genome_ref_first_index_file
picard_jar_path: picard_jar_path
picard_java_opts: picard_java_opts
nthreads: nthreads_map
out:
- output_data_sorted_dedup_bam_files
- output_data_sorted_dups_marked_bam_files
- output_picard_mark_duplicates_files
- output_pbc_files
- output_bowtie_log
- output_preseq_c_curve_files
- output_percentage_uniq_reads
- output_read_count_mapped
- output_percent_mitochondrial_reads
peak_call:
run: 04-peakcall-pe.cwl
in:
input_bam_files: map/output_data_sorted_dedup_bam_files
input_bam_format:
valueFrom: BAMPE
genome_effective_size: genome_effective_size
input_genome_sizes: genome_sizes_file
as_narrowPeak_file: as_narrowPeak_file
nthreads: nthreads_peakcall
out:
- output_spp_x_cross_corr
- output_spp_cross_corr_plot
- output_read_in_peak_count_within_replicate
- output_peak_file
- output_peak_bigbed_file
- output_peak_summits_file
- output_extended_peak_file
- output_peak_xls_file
- output_filtered_read_count_file
- output_peak_count_within_replicate
- output_unpaired_peak_file
- output_unpaired_peak_bigbed_file
- output_unpaired_peak_summits_file
- output_unpaired_extended_peak_file
- output_unpaired_peak_xls_file
- output_unpaired_filtered_read_count_file
- output_unpaired_peak_count_within_replicate
quant:
run: 05-quantification.cwl
in:
input_bam_files: map/output_data_sorted_dedup_bam_files
input_genome_sizes: genome_sizes_file
nthreads: nthreads_quant
out:
- bigwig_raw_files
- bigwig_norm_files
outputs:
qc_fastqc_data_files_read1:
doc: FastQC data files for paired_read1
type: File[]
outputSource: qc/output_fastqc_data_files_read1
qc_fastqc_report_files_read1:
doc: FastQC reports in zip format for paired_read1
type: File[]
outputSource: qc/output_fastqc_report_files_read1
qc_count_raw_reads_read1:
doc: Raw read counts of fastq files for paired_read1 after QC
type: File[]
outputSource: qc/output_count_raw_reads_read1
qc_diff_counts_read1:
doc: Diff file between number of raw reads and number of reads counted by FASTQC, for paired_read1
type: File[]
outputSource: qc/output_diff_counts_read1
trimm_fastq_files_read1:
doc: FASTQ files for paired_read1 after trimming
type: File[]
outputSource: trimm/output_data_fastq_read1_trimmed_files
trimm_raw_counts_read1:
doc: Raw read counts for paired_read1 of fastq files after trimming
type: File[]
outputSource: trimm/output_trimmed_read1_fastq_read_count
qc_fastqc_data_files_read2:
doc: FastQC data files for paired_read2
type: File[]
outputSource: qc/output_fastqc_data_files_read2
qc_fastqc_report_files_read2:
doc: FastQC reports in zip format for paired_read2
type: File[]
outputSource: qc/output_fastqc_report_files_read2
qc_count_raw_reads_read2:
doc: Raw read counts of fastq files for paired_read2 after QC
type: File[]
outputSource: qc/output_count_raw_reads_read2
qc_diff_counts_read2:
doc: Diff file between number of raw reads and number of reads counted by FASTQC, for paired_read2
type: File[]
outputSource: qc/output_diff_counts_read2
trimm_fastq_files_read2:
doc: FASTQ files for paired_read2 after trimming
type: File[]
outputSource: trimm/output_data_fastq_read2_trimmed_files
trimm_raw_counts_read2:
doc: Raw read counts for paired_read2 of fastq files after trimming
type: File[]
outputSource: trimm/output_trimmed_read2_fastq_read_count
map_read_count_mapped:
doc: Read counts of the mapped BAM files
type: File[]
outputSource: map/output_read_count_mapped
map_bowtie_log_files:
doc: Bowtie log file with mapping stats
type: File[]
outputSource: map/output_bowtie_log
map_preseq_percentage_uniq_reads:
doc: Preseq percentage of uniq reads
type: File[]
outputSource: map/output_percentage_uniq_reads
map_pbc_files:
doc: PCR Bottleneck Coefficient files (used to flag samples when pbc<0.5)
type: File[]
outputSource: map/output_pbc_files
map_dedup_bam_files:
doc: Filtered BAM files (post-processing end point)
type: File[]
outputSource: map/output_data_sorted_dups_marked_bam_files
map_mark_duplicates_files:
doc: Summary of duplicates removed with Picard tool MarkDuplicates (for multiple reads aligned to the same positions
type: File[]
outputSource: map/output_picard_mark_duplicates_files
map_preseq_c_curve_files:
doc: Preseq c_curve output files
type: File[]
outputSource: map/output_preseq_c_curve_files
map_percent_mitochondrial_reads:
doc: Percentage of mitochondrial reads
type: File[]
outputSource: map/output_percent_mitochondrial_reads
peakcall_peak_file:
doc: Peaks in ENCODE Peak file format
type: File[]
outputSource: peak_call/output_peak_file
peakcall_spp_x_cross_corr:
doc: SPP strand cross correlation summary
type: File[]
outputSource: peak_call/output_spp_x_cross_corr
peakcall_peak_xls_file:
doc: Peak calling report file
type: File[]
outputSource: peak_call/output_peak_xls_file
peakcall_peak_summits_file:
doc: Peaks summits in bedfile format
type: File[]
outputSource: peak_call/output_peak_summits_file
peakcall_peak_count_within_replicate:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call/output_peak_count_within_replicate
peakcall_spp_x_cross_corr_plot:
doc: SPP strand cross correlation plot
type: File[]
outputSource: peak_call/output_spp_cross_corr_plot
peakcall_filtered_read_count_file:
doc: Filtered read count after peak calling
type: File[]
outputSource: peak_call/output_filtered_read_count_file
peakcall_extended_peak_file:
doc: Extended fragment peaks in ENCODE Peak file format
type: File[]
outputSource: peak_call/output_extended_peak_file
peakcall_read_in_peak_count_within_replicate:
doc: Peak counts within replicate
type: File[]
outputSource: peak_call/output_read_in_peak_count_within_replicate
peakcall_peak_bigbed_file:
doc: Peaks in bigBed format
type: File[]
outputSource: peak_call/output_peak_bigbed_file
peakcall_output_unpaired_peak_xls_file:
doc: Peak calling report file (*_peaks.xls file produced by MACS2) using each paired mate independently
type: File[]
outputSource: peak_call/output_unpaired_peak_xls_file
peakcall_output_unpaired_extended_peak_file:
doc: peakshift/phantomPeak extended fragment results file using each paired mate independently
type: File[]
outputSource: peak_call/output_unpaired_extended_peak_file
peakcall_output_unpaired_peak_count_within_replicate:
doc: Peak counts within replicate using each paired mate independently
type: File[]
outputSource: peak_call/output_unpaired_peak_count_within_replicate
peakcall_output_unpaired_peak_bigbed_file:
doc: peakshift/phantomPeak results bigbed file using each paired mate independently
type: File[]
outputSource: peak_call/output_unpaired_peak_bigbed_file
peakcall_output_unpaired_peak_file:
doc: peakshift/phantomPeak results file using each paired mate independently
type: File[]
outputSource: peak_call/output_unpaired_peak_file
peakcall_output_unpaired_peak_summits_file:
doc: File containing peak summits using each paired mate independently
type: File[]
outputSource: peak_call/output_unpaired_peak_summits_file
peakcall_output_unpaired_filtered_read_count_file:
doc: Filtered read count reported by MACS2 using each paired mate independently
type: File[]
outputSource: peak_call/output_unpaired_filtered_read_count_file
quant_bigwig_raw_files:
doc: Raw reads bigWig (signal) files
type: File[]
outputSource: quant/bigwig_raw_files
quant_bigwig_norm_files:
doc: Normalized reads bigWig (signal) files
type: File[]
outputSource: quant/bigwig_norm_files