-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path4.GetAnnFasta.sh
executable file
·60 lines (51 loc) · 1.96 KB
/
4.GetAnnFasta.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/bin/bash
#####################################
#### Extracts single fasta files from single genes.
#### Uses as input contigs from the previous step (3.GeneOrder).
####
#### Usage: bash 4.GetAnnFasta.sh Directory Prefix Output
#### - Directory = directory with all MITOS raw data folders
#### - Prefix = ID of the sample being processed
#### - Output = Path to the output folder of your choice (must be the same as in steps 1,2 and 3)
####
#### Dependencies: MtDNA.mitos_result2bedfile.py (in bin folder), MtDNA.Bed2fasta.py (in bin folder).
####
#### The script generates three outputs:
#### 1) A fasta file containing the complete sequences of all the annotated contigs (All.contigs.fasta)
#### 2) A bed file summarizing the position of each genes (All.contigs.bed)
#### 3) A fasta file containing the sequence of each gene found (All.genes.fasta)
####
#### Francesco Cicconardi (2016)
#####################################
### Getting script arguments
MITODIR=$1
PREFIX=$2
OUTPUT=$3
### Important parameter
CTG_ID=$OUTPUT/$PREFIX.All.contigs.ids
### Defining output files to be generated
CTG_FAS=$OUTPUT/$PREFIX.mtDNA.All.contigs.fasta
CTG_BED=$OUTPUT/$PREFIX.mtDNA.All.contigs.bed
GENE_FAS=$OUTPUT/$PREFIX.mtDNA.All.genes.fasta
### Removing old files
rm $CTG_FAS $CTG_BED $GENE_FAS
touch $CTG_FAS
touch $CTG_BED
touch $GENE_FAS
### Entering directory
cd $MITODIR
### Keeping only the ID of the contigs and looping over them
cut -d ':' -f 1 $CTG_ID | while read contig
do
cd $contig
echo Processing $contig
echo Conversion...
python ../../bin/MtDNA.mitos_result2bedfile.py result > $contig.ann.bed
echo Extract fasta seq...
python ../../bin/MtDNA.Bed2fasta.py -f sequence.fas -b $contig.ann.bed -o $contig.genes.fasta
echo -e "Concatenate results...\n"
cat sequence.fas >> $CTG_FAS
cat $contig.ann.bed >> $CTG_BED
cat $contig.genes.fasta >> $GENE_FAS
cd ..
done