-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathpre_BEAST_prep.sh
executable file
·70 lines (51 loc) · 2.46 KB
/
pre_BEAST_prep.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/bin/bash
###Preparing alignments and finding best-fit nucleotide sequence evolution models
path=/home/sobczm/popgen/phylogenetics
scripts=/home/sobczm/bin/popgen/phylogenetics
##MAFFT (align sequences)
cd $path
mkdir busco_alignments
mv BUSCO*.fasta $path/busco_alignments
cd $path/busco_alignments
qsub $scripts/sub_mafft_alignment.sh
## For closely related organisms (same species etc.): identify genes with high nucleotide diversity (Pi) and average number of pairwise differences, medium number of segregating sites
## (avoid alignments with low homology and lots of phylogenetically uninformative singletons).
#For analyses involving cross-species comparisons involving highly diverged sequences with high nucleotide diversity
# (e.g. 0.1<Pi<0.4), looking for genes with the lowest number of segregating sites.
python $scripts/calculate_nucleotide_diversity.py "*aligned.fasta"
mkdir -p $path/beast_runs/results
mv sequence_stats.txt excel_stats.txt $path/beast_runs/results
## Copy FASTA files of the candidate genes for the phylogeny into
mkdir -p $path/beast_runs/candidates
## Visually inspect the alignments of select genes (genes_selected_for_phylogeny.txt) to be used in
## constructing the phylogenies and trim them as necessary in MEGA7.
## Copy the relevant trimmed alignment FASTA files into
mkdir $path/beast_runs/candidates/select/trimmed
##PartitionFinder (nucleotide sequence evolution model)
config_template=/home/sobczm/bin/PartitionFinder1.1.1/partition_finder.cfg
ct=$(basename "$config_template")
cd $path/beast_runs/candidates/select/trimmed
mkdir NEXUS
# prepare directory for PartitionFinder run:
for f in *fasta
do
c="$(cat $f | awk 'NR%2==0' | awk '{print length($1)}' | head -1)"
p="${f%.fasta}.phy"
n="${f%.fasta}.NEXUS"
dir="${f%.fasta}"
mkdir $dir
cp $config_template $dir
# Substitute the name of the alignment file and the sequence length in the config file to become correct for the current run.
sed -i 's,^\(alignment = \).*,\1'"$p;"',' $dir/$ct
sed -i 's,^\(Gene1_pos1 = \).*,\1'"1-$c\\\3;"',' $dir/$ct
sed -i 's,^\(Gene1_pos2 = \).*,\1'"2-$c\\\3;"',' $dir/$ct
sed -i 's,^\(Gene1_pos3 = \).*,\1'"3-$c\\\3;"',' $dir/$ct
# Convert FASTA to phylip for the Partition Finder run
$scripts/fasta2phylip.pl $f>$p
mv $p $dir
# Convert FASTA to NEXUS for the BEAST run
$scripts/Fasta2Nexus.pl $f>$n
mv $n NEXUS
#Problems running PartitionFinder on the cluster. May have to be run locally on your Mac or Windows machine.
qsub $scripts/sub_partition_finder.sh $dir
done