-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathcallers.yaml
128 lines (113 loc) · 4.22 KB
/
callers.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# All paths are relative to the directory that Snakemake is executed in
# Note: this file is written in the YAML syntax (https://learnxinyminutes.com/docs/yaml/)
# CALLER SPECIFIC PARAMETERS
# These parameters are used by the pipeline when it executes each of the callers.
# All caller specific parameters are optional except where otherwise noted as
# "required" in the comments below.
# You can specify:
# 1) cols - any other columns (besides CHROM, POS, REF, and ALT) to extract from the VCFs output by each caller
# (note that this attribute will be ignored if your caller script outputs a TSV instead of a VCF)
# 2) params - any extra parameters that should be passed to the caller script
# 3) ext - whether your caller outputs a VCF (assumed if not specified) or a TSV
# 4) na - replacement values to use (instead of the default value of 0) when filling NA's
# (note that this is ignored if 'keep_na' is set to true in config.yaml or prepare.yaml)
# Each key should be the <caller_id> and each value can be a dictionary containing any of the attributes listed above
# "cols" should be a dictionary of lists for each category of columns in the VCF (where categories can be 'info', 'format', and 'other' for all others)
# "params" can be a single string or a list of strings
# "na" should be a dictionary containing as keys each column name and as values the replacement value
# Note that the trained classification models we provide in the example data
# only work with these settings. So if you change any of the "cols" or "na"
# values, you will also need to create new trained classification models.
gatk-snp:
cols:
other: [QUAL]
info: [QD,FS,MQ,AC,ExcessHet]
format: [DP,GQ]
varscan-snp:
cols:
info: [ADP]
format: [SDP,DP,RD,AD,PVAL,ABQ,GQ,RBQ,RDF,RDR,ADF,ADR]
na:
PVAL: 1.01E0
vardict-snp:
cols:
other: [QUAL]
info: [TYPE,DP,VD,AF,QSTD,MQ,SN,HIAF,ADJAF,NM,SVTYPE,SVLEN,DUPRATE]
na:
SVTYPE: NC
pg-snp:
# path to a bgzip, indexed VCF containing variants called in the platinum genomes project
# required!
params: /iblm/netapp/data1/external/PlatinumGenomes/2017-1.0/hg19/hybrid/hg19.hybrid.no_chr.vcf.gz
gatk-indel:
cols:
info: [QD,FS,MQ,AC,ExcessHet]
format: [DP,GQ]
varscan-indel:
cols:
info: [ADP]
format: [SDP,DP,RD,AD,PVAL,ABQ,GQ,RBQ,RDF,RDR,ADF,ADR]
na:
PVAL: 1.01E0
vardict-indel:
cols:
other: [QUAL]
info: [TYPE,DP,VD,AF,QSTD,MQ,SN,HIAF,ADJAF,NM,SVTYPE,SVLEN,DUPRATE]
na:
SVTYPE: NC
delly:
cols:
format: [RC,GQ]
pindel:
cols:
info: [HOMLEN,SVLEN,SVTYPE]
format: [PL,RD]
na:
SVTYPE: NC
illumina-manta:
cols:
other: [QUAL]
info: [SVTYPE,SVLEN,HOMLEN,BND_DEPTH,MATE_BND_DEPTH]
format: [GQ]
na:
SVTYPE: NC
illumina-strelka:
cols:
other: [QUAL]
info: [REFREP,IDREP,MQ]
format: [GQX,DP,DPF,MIN_DP,DPI,SB]
params:
# 1) path to strelka config file
# required!
- configs/configureStrelkaGermlineWorkflow.py.ini
# 2) path to the directory in which strelka is stored or "" if strelka is installed in your current conda env (or if using --use-conda)
# required!
- ""
breakca:
# specify that breakca creates a tsv instead of a vcf
# required!
ext: tsv
params:
# 1) the (relative) path to the breakCA scripts directory
# required!
- breakCA
# 2) path to the directory in which the Rscript executable is held or "" if R is installed in your current conda env (or if using --use-conda)
# required!
- ""
pg-indel:
# path to a bgzip, indexed VCF containing variants called in the platinum genomes project
# required!
params: /iblm/netapp/data1/external/PlatinumGenomes/2017-1.0/hg19/hybrid/hg19.hybrid.no_chr.vcf.gz
# Custom parameters can also be passed to special scripts that perform pre-caller steps:
illumina:
params:
# 1) path to manta config file
# required!
- configs/configManta.py.ini
# 2) path to the directory in which manta is stored or "" if manta is installed in your current conda env (or if using --use-conda)
# required!
- ""
pg:
# path to a bgzip, indexed VCF containing variants called in the platinum genomes project
# required!
params: /iblm/netapp/data1/external/PlatinumGenomes/2017-1.0/hg19/hybrid/hg19.hybrid.no_chr.vcf.gz