-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcateg_ffindex_files.sh
53 lines (40 loc) · 2.13 KB
/
categ_ffindex_files.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/bin/bash
DIR=$(dirname ${1})
SMPL="marine_hmp"
OUTDIR=$(dirname ${2})
MMSEQS=~/opt/MMseqs2/bin/mmseqs
FFINDEX=ffindex_apply
LIB=~/opt/hh-suite
STEP=${3}
if [[ ${STEP} = "known_refinement" ]]; then
CATEG=$(echo -e "kwp")
elif [[ ${STEP} = "unkn_refinement" ]]; then
CATEG=$(echo -e "eu")
else
CATEG=$(echo -e "eu\ngu\nkwp\nk")
fi
#Create subDB for each cluster categ to retrieve the MSAs, the consensus and the profiles; using the cluster ids/names
#Ex: Genomic unknowns = gu
for categ in $CATEG; do
"${MMSEQS}" createsubdb "${DIR}"/"${SMPL}"_"${categ}"_ids.txt \
data/mmseqs_clustering/"${SMPL}"_clu_fa "${OUTDIR}"/"${SMPL}"_"${categ}"_clu
# Retrieve set of ORFs for each category
sed -e 's/\x0//g' "${OUTDIR}"/"${SMPL}"_"${categ}"_clu | gzip > "${OUTDIR}"/"${SMPL}"_"${categ}"_cl_orfs.fasta.gz
grep '^>' "${OUTDIR}"/"${SMPL}"_"${categ}"_cl_orfs.fasta | sed 's/^>//' | gzip > "${OUTDIR}"/"${SMPL}"_"${categ}"_cl_orfs.txt.gz
# Retrieve alignments, consensus sequences and HMMs
"${FFINDEX}" "${OUTDIR}"/"${SMPL}"_"${categ}"_clu "${OUTDIR}"/"${SMPL}"_"${categ}"_clu.index \
-i "${OUTDIR}"/"${SMPL}"_"${categ}"_aln.ffindex -d "${OUTDIR}"/"${SMPL}"_"${categ}"_aln.ffdata \
-- famsa STDIN STDOUT 2> /dev/null
"${FFINDEX}" "${OUTDIR}"/"${SMPL}"_"${categ}"_aln.ff{data,index} \
-i "${OUTDIR}"/"${SMPL}"_"${categ}"_a3m.ffindex -d "${OUTDIR}"/"${SMPL}"_"${categ}"_a3m.ffdata \
-- ${PWD}/scripts/Cluster_categories_refinement/reformat_file.sh
"${FFINDEX}" "${OUTDIR}"/"${SMPL}"_"${categ}"_aln.ff{data,index} \
-i "${OUTDIR}"/"${SMPL}"_"${categ}"_cons.ffindex -d "${OUTDIR}"/"${SMPL}"_"${categ}"_cons.ffdata \
-- ${PWD}/scripts/Cluster_categories_refinement/consensus.sh
cstranslate -A "${LIB}"/data/cs219.lib -D "${LIB}"/data/context_data.lib \
-x 0.3 -c 4 -f -i "${OUTDIR}"/"${SMPL}"_"${categ}"_aln -o "${OUTDIR}"/"${SMPL}"_"${categ}"_cs219 -I fas -b
"${FFINDEX}" "${OUTDIR}"/"${SMPL}"_"${categ}"_aln.ff{data,index} \
-i "${OUTDIR}"/"${SMPL}"_"${categ}"_hmm.ffindex -d "${OUTDIR}"/"${SMPL}"_"${categ}"_hmm.ffdata \
-- ${PWD}/scripts/Cluster_categories_refinement/hhmake.sh
done
echo "Done all categes of clusters"