Bash scripts to make single-copy orthologue fasta files ready for phylogenomics from Broccoli output.
A bash tool which extracts a certain gene from an assembly using BLAST and bedtools.
awk '/^>/ {
printf("\n%s\n",$0);next;
} {
printf("%s",$0);
} END {
printf("\n");
}' file.fasta | \
awk -F '[^-](.*[^-]|$)' '{
s=$0;
h=gsub(/./,"?",$1);
t=gsub(/./,"?",$2);
print $1 substr(s,h+1, length(s)-h-t) $2
}' > file_edited.fasta
cat file.fasta
>test
-------------agtc-cgcatgaggatagctcgtagataaaa---------
>test2
-----atta--------------atttgacc--------tga-----------
>test3
ataaagctcggctaa-----------------------tggac----------
cat file_edited.fasta
>test
?????????????agtc-cgcatgaggatagctcgtagataaaa?????????
>test2
?????atta--------------atttgacc--------tga???????????
>test3
ataaagctcggctaa-----------------------tggac??????????
awk '/^>/ {
if (seqlen){print seqlen}; print ;seqlen=0;next;
} {
seqlen += length($0)
}END{
print seqlen
}' file.fasta
>ptg000001
4551850
>ptg000002
10701577
>ptg000003
6461149
>ptg000004
6151846
>ptg000005
8702012
sed 's/^>/\x00&/' file.fasta | sort -z | tr -d '\0'