forked from CIaran-Lundy/algorithm_benchmarking
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.nf
165 lines (134 loc) · 4.26 KB
/
test.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
#!/usr/bin/env nextflow
params.left_reads = "/alignment_data/trunc1.fq"
params.right_reads = "/alignment_data/trunc2.fq"
params.reference_genome_fasta = "/data/GCA_000001405.15_GRCh38_full_analysis_set.fna"
params.bowtie_index = "/data/bowtie2_index/GCA_000001405.15_GRCh38_full_analysis_set.fna.bowtie_index"
params.known_sites_vcf = "/data/Homo_sapiens_assembly38.dbsnp138.vcf"
params.known_sites_vcf_index = "/data/Homo_sapiens_assembly38.dbsnp138.vcf.idx"
params.reference_genome_dict = "/data/GCA_000001405.15_GRCh38_full_analysis_set.dict"
params.reference_genome_fai_index = "/data/GCA_000001405.15_GRCh38_full_analysis_set.fna.fai"
process ALIGN {
input:
path 'left_reads' from params.left_reads
path 'right_reads' from params.right_reads
val 'bowtie_index' from params.bowtie_index
output:
path 'aligned_bam' into aligned
"""
bowtie2 \
--rg-id 'definitelyarealid' \
--rg 'SM:samplemcsampleid\tLB:libraryname\tPL:ILLUMINA' \
-p 32 \
-q \
-x $bowtie_index \
-1 $left_reads \
-2 $right_reads \
> aligned_bam
"""
}
process FIXMATES {
input:
path 'aligned_bam' from aligned
output:
path 'aligned_bam_f_mates' into fixed_mates
"""
samtools fixmate \
-m $aligned_bam \
aligned_bam_f_mates
"""
}
process COORDINATE_SORT {
input:
path 'aligned_bam_f_mates' from fixed_mates
output:
path 'aligned_bam_f_mates_coord_sort' into coord_sorted
"""
samtools sort \
-T /data/sort123_ \
$aligned_bam_f_mates \
-o aligned_bam_f_mates_coord_sort
"""
}
process MARK_DUPLICATES {
input:
path 'aligned_bam_f_mates_coord_sort' from coord_sorted
path 'reference_genome_fasta' from params.reference_genome_fasta
output:
path 'aligned_bam_f_mates_coord_sort_mrkd_dups' into marked_duplicates
"""
samtools markdup \
--reference $reference_genome_fasta \
$aligned_bam_f_mates_coord_sort \
aligned_bam_f_mates_coord_sort_mrkd_dups
"""
}
process WRITE_TO_CRAM_FILE {
input:
path 'aligned_bam_f_mates_coord_sort_mrkd_dups' from marked_duplicates
output:
path 'aligned_bam_f_mates_coord_sort_mrkd_dups.cram'
"""
cat $aligned_bam_f_mates_coord_sort_mrkd_dups > aligned_bam_f_mates_coord_sort_mrkd_dups.cram
"""
}
process MAKE_BQSR_TABLE {
input:
path aligned_bam_f_mates_coord_sort_mrkd_dups from marked_duplicates
path known_sites_vcf from params.known_sites_vcf
path reference_genome_fasta from params.reference_genome_fasta
path reference_genome_dict from params.reference_genome_dict
path reference_genome_fai_index from params.reference_genome_fai_index
path known_sites_vcf_index from params.known_sites_vcf_index
output:
path 'BQSR_table' into BQSR_table_out
"""
gatk BaseRecalibrator \
--input $aligned_bam_f_mates_coord_sort_mrkd_dups \
--known-sites $known_sites_vcf \
--reference $reference_genome_fasta \
--output BQSR_table
"""
}
process WRITE_BQSR_TABLE {
input:
path 'BQSR_table' from BQSR_table_out
output:
path 'BQSR.table'
"""
cat $BQSR_table > BQSR.table
"""
}
process ANALYSE_COVARIATES {
input:
path 'BQSR_table' from BQSR_table_out
output:
path 'analyze_covariates_plots.pdf'
"""
gatk AnalyzeCovariates -bqsr $BQSR_table -plots analyze_covariates_plots.pdf
"""
}
process INDEX_CRAM {
input:
path 'aligned_bam_f_mates_coord_sort_mrkd_dups' from marked_duplicates
output:
set file("aligned_bam_f_mates_coord_sort_mrkd_dups.bam"), file("aligned_bam_f_mates_coord_sort_mrkd_dups.bam.bai") into bai
"""
cat $aligned_bam_f_mates_coord_sort_mrkd_dups > aligned_bam_f_mates_coord_sort_mrkd_dups.bam
samtools index $aligned_bam_f_mates_coord_sort_mrkd_dups aligned_bam_f_mates_coord_sort_mrkd_dups.bam.bai
"""
}
process HAPLOTYPE_CALLER {
input:
path reference_genome_fasta from params.reference_genome_fasta
path reference_genome_fai_index from params.reference_genome_fai_index
path reference_genome_dict from params.reference_genome_dict
set file(aligned_bam_f_mates_coord_sort_mrkd_dups), file(aligned_bam_f_mates_coord_sort_mrkd_dups_index) from bai
output:
path 'haplotype_caller_output' into haplotype_caller_output_channel
"""
gatk HaplotypeCaller \
--input $aligned_bam_f_mates_coord_sort_mrkd_dups \
--reference $reference_genome_fasta \
--output haplotype_caller_output
"""
}