-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
145 lines (132 loc) · 4.25 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import glob
import multiprocessing
import os
from python import down_ascn
from python import fasta_cleaner
from python import concatenate_fastas
from python import newick_tree_visualizer
# Dataset
sample_names = [os.path.splitext(os.path.basename(file))[0] for file in glob.glob("ascn/*.txt")]
rule all:
input:
"Tree.svg"
rule Download_ASCN:
input:
ascn="ascn/{sample}.txt"
output:
fasta="fasta/{sample}.fasta"
run:
down_ascn.download_sequences_from_asn_file(input.ascn,output.fasta)
rule Clean:
input:
fasta="fasta/{sample}.fasta"
output:
clean="fasta_clean/{sample}.fasta"
run:
fasta_cleaner.clean_fasta(input.fasta,output.clean)
rule Alignment:
input:
clean="fasta_clean/{sample}.fasta"
output:
align="fasta_align/{sample}.fasta"
params:
threads=multiprocessing.cpu_count()
shell:
"""
mafft --auto --thread {params.threads} {input.clean} > {output.align}
"""
rule Trim:
input:
align="fasta_align/{sample}.fasta"
output:
trim="trim/{sample}.fasta"
params:
threshold=80
shell:
"""
python3 python/trim_fasta_edges.py {input.align} {output.trim} {params.threshold}
"""
rule Best_AICc_model:
input:
trim="trim/{sample}.fasta"
output:
"trim/{sample}.txt",
"trim/{sample}.fasta.ckp",
"trim/{sample}.fasta.log",
"trim/{sample}.fasta.out",
"trim/{sample}.fasta.topos",
"trim/{sample}.fasta.tree"
shell:
"modeltest-ng -i {input.trim} -t ml > {output}"
rule obtain_best_AICc_model:
input:
data="trim/{sample}.txt"
output:
"trim/{sample}Model.txt"
shell:
"grep 'Best model according to AICc' -A 2 {input.data} | tail -n 1 | sed 's/^.* //' > {output}"
rule add_seq_length_to_model:
input:
trim=expand("trim/{sample}Model.txt", sample=sample_names)
output:
prim="tree/prim.part"
shell:
"python3 python/sequence_model_processor.py ./trim {output.prim}"
rule Concatenated:
input:
prim="tree/prim.part"
output:
align="tree/concatenated.fasta"
run:
concatenate_fastas.concatenate_fastas("./trim",output.align)
rule maximum_likelihood_tree_step_1:
input:
model="tree/prim.part",
msa="tree/concatenated.fasta"
output:
"tree/concatenated.fasta.raxml.bestTree",
"tree/concatenated.fasta.raxml.bestModel",
"tree/concatenated.fasta.raxml.mlTrees",
"tree/concatenated.fasta.raxml.rba",
"tree/concatenated.fasta.raxml.startTree"
params:
threads=multiprocessing.cpu_count(),
outgroup="Macrobiotus_rybaki"
shell:
"""
raxml-ng --msa {input.msa} --model {input.model} --threads {params.threads} --seed 27 --tree pars{{100}},rand{{100}} --force perf_threads --outgroup {params.outgroup}
"""
rule maximum_likelihood_tree_step_2:
input:
model="tree/prim.part",
msa="tree/concatenated.fasta"
output:
"tree/concatenated.fasta.raxml.bootstraps",
"tree/concatenated.fasta.raxml.log",
"tree/concatenated.fasta.raxml.rba"
params:
threads=multiprocessing.cpu_count(),
bootstrap_trees=10000000,
outgroup="Macrobiotus_rybaki"
shell:
"""
raxml-ng --bootstrap --msa {input.msa} --model {input.model} --threads {params.threads} --seed 27 --bs-trees autoMRE{{{params.bootstrap_trees}}} --force perf_threads --outgroup {params.outgroup}
"""
rule maximum_likelihood_tree_step_3:
input:
tree="tree/concatenated.fasta.raxml.bestTree",
bootstraps="tree/concatenated.fasta.raxml.bootstraps"
output:
"tree/concatenated.fasta.raxml.bestTree.raxml.support",
"tree/concatenated.fasta.raxml.bestTree.raxml.log"
params:
threads=multiprocessing.cpu_count(),
shell:
"raxml-ng --support --tree {input.tree} --bs-trees {input.bootstraps} --threads {params.threads} --force perf_threads"
rule build_tree:
input:
tree="tree/concatenated.fasta.raxml.bestTree.raxml.support"
output:
img="Tree.svg"
run:
newick_tree_visualizer.tree_generator(input.tree, output.img)