-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathingest_fauna.smk
47 lines (44 loc) · 1.86 KB
/
ingest_fauna.smk
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
from pathlib import Path
def path_to_fauna(w):
try:
return config["path_to_fauna"]
except KeyError:
raise Exception("Your config must define 'path_to_fauna'")
rule download_segment:
output:
sequences = "fauna/data/{segment}.fasta",
params:
fasta_fields = "strain virus accession collection_date region country division location host domestic_status subtype originating_lab submitting_lab authors PMID gisaid_clade h5_clade",
output_dir = lambda wildcards, output: Path(output.sequences).parent,
output_fstem = lambda wildcards, output: Path(output.sequences).stem,
path_to_fauna = path_to_fauna
benchmark:
"fauna/benchmarks/download_segment_{segment}.txt"
shell:
"""
python3 {params.path_to_fauna:q}/vdb/download.py \
--database vdb \
--virus avian_flu \
--fasta_fields {params.fasta_fields} \
--select locus:{wildcards.segment} \
--path {params.output_dir} \
--fstem {params.output_fstem}
"""
rule parse_segment:
input:
sequences = "fauna/data/{segment}.fasta",
output:
sequences = "fauna/results/sequences_{segment}.fasta",
metadata = "fauna/data/metadata_{segment}.tsv",
params:
fasta_fields = "strain virus isolate_id date region country division location host domestic_status subtype originating_lab submitting_lab authors PMID gisaid_clade h5_clade",
prettify_fields = "region country division location host originating_lab submitting_lab authors PMID"
shell:
"""
augur parse \
--sequences {input.sequences} \
--output-sequences {output.sequences} \
--output-metadata {output.metadata} \
--fields {params.fasta_fields} \
--prettify-fields {params.prettify_fields}
"""