-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added command : gotree extract mutations
- Loading branch information
1 parent
46057ab
commit 76f7e39
Showing
5 changed files
with
285 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
package cmd | ||
|
||
import ( | ||
"bufio" | ||
"fmt" | ||
goio "io" | ||
"os" | ||
|
||
"github.com/evolbioinfo/goalign/align" | ||
"github.com/evolbioinfo/goalign/io/fasta" | ||
"github.com/evolbioinfo/goalign/io/phylip" | ||
"github.com/evolbioinfo/gotree/io" | ||
"github.com/evolbioinfo/gotree/io/utils" | ||
"github.com/evolbioinfo/gotree/mutations" | ||
"github.com/evolbioinfo/gotree/tree" | ||
"github.com/spf13/cobra" | ||
) | ||
|
||
var mutationsalign string | ||
var mutationsphylip bool | ||
var mutationsinputstrict bool | ||
var outfile string | ||
|
||
// mutationsCmd represents the mutations command | ||
var mutationsCmd = &cobra.Command{ | ||
Use: "mutations", | ||
Short: "Extract the list of mutations along the branches of the phylogeny.", | ||
Long: `Extract the list of mutations along the branches of the phylogeny, given | ||
the full list of ancestral (and terminal) sequences. | ||
The input tree must have internal node names specified and must be rooted. | ||
The input alignment (fasta or phylip only) must specify one sequence per internal | ||
node name and tip. | ||
The output consists of the list of mutations that appear along the branches of the | ||
tree, tab separated text file: | ||
1. Tree index (useful if several trees in the input tree file) | ||
2. Alignment site index | ||
3. Branch index | ||
4. Child node name | ||
5. Parent character | ||
6. Child character | ||
7. Number of descendent tips | ||
8. Number of descendent tips that have the child character | ||
`, | ||
RunE: func(cmd *cobra.Command, args []string) (err error) { | ||
var align align.Alignment | ||
var fi goio.Closer | ||
var r *bufio.Reader | ||
var treefile goio.Closer | ||
var treechan <-chan tree.Trees | ||
var f *os.File | ||
var muts *mutations.MutationList | ||
|
||
// Reading the alignment | ||
if fi, r, err = utils.GetReader(mutationsalign); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
if mutationsphylip { | ||
if align, err = phylip.NewParser(r, mutationsinputstrict).Parse(); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
} else { | ||
if align, err = fasta.NewParser(r).Parse(); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
} | ||
fi.Close() | ||
|
||
// Reading the trees | ||
if treefile, treechan, err = readTrees(intreefile); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
defer treefile.Close() | ||
|
||
if f, err = openWriteFile(outfile); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
defer closeWriteFile(f, outfile) | ||
|
||
fmt.Fprintf(f, "Tree ID\tSite\tBranch ID\tNode Name\tParent Character\tChild Character\tTotal tips\tSame Character Tips\n") | ||
|
||
for t := range treechan { | ||
if muts, err = mutations.CountMutations(t.Tree, align); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
for _, m := range muts.Mutations { | ||
fmt.Fprintf(f, "%d\t%d\t%d\t%s\t%c\t%c\t%d\t%d\n", t.Id, m.AlignmentSite, m.BranchIndex, m.ChildNodeName, m.ParentCharacter, m.ChildCharacter, m.NumTips, m.NumTipsWithChildCharacter) | ||
} | ||
} | ||
return | ||
}, | ||
} | ||
|
||
func init() { | ||
computeCmd.AddCommand(mutationsCmd) | ||
mutationsCmd.PersistentFlags().StringVarP(&mutationsalign, "align", "a", "stdin", "Alignment input file") | ||
mutationsCmd.PersistentFlags().BoolVarP(&mutationsphylip, "phylip", "p", false, "Alignment is in phylip? default : false (Fasta)") | ||
mutationsCmd.PersistentFlags().BoolVar(&mutationsinputstrict, "input-strict", false, "Strict phylip input format (only used with -p)") | ||
mutationsCmd.PersistentFlags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree") | ||
mutationsCmd.PersistentFlags().StringVarP(&outfile, "output", "o", "stdout", "Output file") | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
package mutations | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/evolbioinfo/goalign/align" | ||
"github.com/evolbioinfo/gotree/io" | ||
"github.com/evolbioinfo/gotree/tree" | ||
) | ||
|
||
func CountMutations(t *tree.Tree, a align.Alignment) (mutations *MutationList, err error) { | ||
var sitemutations *MutationList | ||
mutations = NewMutationList() | ||
|
||
// We set branch ids | ||
nbranches := 0 | ||
for _, e := range t.Edges() { | ||
e.SetId(nbranches) | ||
nbranches++ | ||
} | ||
|
||
// We set nodes ids identical to index in alignment | ||
// and we check that sequences correspond to all tree nodes | ||
// and all tree nodes have a name | ||
for _, n := range t.Nodes() { | ||
var i int | ||
|
||
if n.Name() == "" { | ||
err = fmt.Errorf("all nodes of the phylogeny must have a name") | ||
io.LogError(err) | ||
return | ||
} | ||
if i = a.GetSequenceIdByName(n.Name()); i < 0 { | ||
err = fmt.Errorf("node %s of the phylogeny does not have an associated sequence in the alignment", n.Name()) | ||
io.LogError(err) | ||
return | ||
} | ||
n.SetId(i) | ||
} | ||
|
||
// We iterate over alignment sites | ||
for i := 0; i < a.Length(); i++ { | ||
if sitemutations, err = countMutationsSite(t, a, i); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
if err = mutations.Append(sitemutations); err != nil { | ||
io.LogError(err) | ||
return | ||
} | ||
} | ||
|
||
return | ||
} | ||
|
||
func countMutationsSite(t *tree.Tree, a align.Alignment, site int) (mutations *MutationList, err error) { | ||
mutations, _, _, err = countMutationSiteBranch(t, nil, t.Root(), nil, a, site) | ||
return | ||
} | ||
|
||
func countMutationSiteBranch(t *tree.Tree, prevNode *tree.Node, currentNode *tree.Node, currentBranch *tree.Edge, a align.Alignment, site int) (mutations *MutationList, ntips int, characterDistribution map[uint8]int, err error) { | ||
var tmpMutations *MutationList | ||
var tmpntips, nidtips int | ||
var tmpCharacterDistribution map[uint8]int | ||
var nextNode *tree.Node | ||
var nextIndex int | ||
var prevChar, curChar uint8 | ||
var prevSeq, curSeq []uint8 | ||
|
||
mutations = NewMutationList() | ||
characterDistribution = make(map[uint8]int) | ||
|
||
curSeq, _ = a.GetSequenceCharById(currentNode.Id()) | ||
curChar = curSeq[site] | ||
|
||
if currentNode.Tip() { | ||
ntips = 1 | ||
characterDistribution[curChar] = 1 | ||
} else { | ||
for nextIndex, nextNode = range currentNode.Neigh() { | ||
if nextNode != prevNode { | ||
if tmpMutations, tmpntips, tmpCharacterDistribution, err = countMutationSiteBranch(t, currentNode, nextNode, currentNode.Edges()[nextIndex], a, site); err != nil { | ||
return | ||
} | ||
for char, nb := range tmpCharacterDistribution { | ||
if _, exist := characterDistribution[char]; !exist { | ||
characterDistribution[char] = nb | ||
} else { | ||
characterDistribution[char] += nb | ||
} | ||
} | ||
ntips += tmpntips | ||
if err = mutations.Append(tmpMutations); err != nil { | ||
return | ||
} | ||
} | ||
} | ||
} | ||
|
||
if prevNode != nil { | ||
prevSeq, _ = a.GetSequenceCharById(prevNode.Id()) | ||
prevChar = prevSeq[site] | ||
|
||
if n, exist := characterDistribution[curChar]; !exist { | ||
nidtips = 0 | ||
} else { | ||
nidtips = n | ||
} | ||
|
||
if prevChar != curChar { | ||
k := fmt.Sprintf("%d-%d-%c-%c", site, currentBranch.Id(), rune(prevChar), rune(curChar)) | ||
m := Mutation{ | ||
AlignmentSite: site, | ||
BranchIndex: currentBranch.Id(), | ||
ChildNodeName: currentNode.Name(), | ||
ParentCharacter: prevChar, | ||
ChildCharacter: curChar, | ||
NumTips: ntips, | ||
NumTipsWithChildCharacter: nidtips, | ||
} | ||
mutations.Mutations[k] = m | ||
} | ||
} | ||
return | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
// package mutations provides data structures & functions | ||
// for counting mutations given an alignment of ancestral and tips sequences | ||
package mutations | ||
|
||
import ( | ||
"fmt" | ||
|
||
"github.com/evolbioinfo/gotree/io" | ||
) | ||
|
||
type Mutation struct { | ||
AlignmentSite int // Index of the site of the alignment | ||
BranchIndex int // Index of the branch | ||
ChildNodeName string // Name of the parent of the clade | ||
ParentCharacter uint8 // Parent character | ||
ChildCharacter uint8 // Child character | ||
NumTips int // Total number of descendent tips | ||
NumTipsWithChildCharacter int // Number of descendent tips that have the child character | ||
} | ||
|
||
type MutationList struct { | ||
Mutations map[string]Mutation // Key: "AlignmentSite-BranchIndex-ParentCharacter-ChildCharacter" | ||
} | ||
|
||
func NewMutationList() (mutations *MutationList) { | ||
mutations = &MutationList{ | ||
Mutations: make(map[string]Mutation), | ||
} | ||
return | ||
} | ||
|
||
func (m *MutationList) Append(mapp *MutationList) (err error) { | ||
var exist bool | ||
for k, v := range mapp.Mutations { | ||
if _, exist = m.Mutations[k]; exist { | ||
err = fmt.Errorf("mutation %s already exist in the list", k) | ||
io.LogError(err) | ||
return | ||
} | ||
m.Mutations[k] = v | ||
} | ||
return | ||
} |