Skip to content

Commit

Permalink
Added max-date to gotree prune date
Browse files Browse the repository at this point in the history
  • Loading branch information
fredericlemoine committed Sep 26, 2024
1 parent 69fcc18 commit de3a2f0
Show file tree
Hide file tree
Showing 2 changed files with 50 additions and 16 deletions.
12 changes: 11 additions & 1 deletion cmd/prunedate.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
)

var pruneMinDate float64
var pruneMaxDate float64

// resolveCmd represents the resolve command
var pruneDateCmd = &cobra.Command{
Expand Down Expand Up @@ -51,7 +52,15 @@ If max-date falls on an internal branch, we do not take this part of the tree, a
return
}
for _, t := range forest {
f.WriteString(t.Newick() + "\n")
if pruneMaxDate > 0 {
if err = t.CutTreeMaxDate(pruneMaxDate); err != nil {
io.LogError(err)
return
}
}
if len(t.Edges()) > 0 {
f.WriteString(t.Newick() + "\n")
}
}
}

Expand All @@ -64,4 +73,5 @@ func init() {
pruneDateCmd.PersistentFlags().StringVarP(&intreefile, "input", "i", "stdin", "Input tree(s) file")
pruneDateCmd.PersistentFlags().StringVarP(&outtreefile, "output", "o", "stdout", "Forest output file")
pruneDateCmd.PersistentFlags().Float64Var(&pruneMinDate, "min-date", 0, "Minimum date to cut the tree")
pruneDateCmd.PersistentFlags().Float64Var(&pruneMaxDate, "max-date", 0, "Maximum date to cut the tree (0=no max date)")
}
54 changes: 39 additions & 15 deletions tree/dates.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,30 +21,19 @@ type LTTData struct {
// If one node does not have date or a malformed date, returns an error
func (t *Tree) NodeDates() (ndates []float64, err error) {
var date float64
var pattern *regexp.Regexp
var matches []string

ndates = make([]float64, 0)
pattern = regexp.MustCompile(`(?i)&date="(.+)"`)
nnodes := 0
t.PreOrder(func(cur *Node, prev *Node, e *Edge) (keep bool) {
keep = true
if cur.Id() != nnodes {
err = fmt.Errorf("node id does not correspond to postorder traversal: %d vs %d", cur.Id(), nnodes)
keep = false
} else if len(cur.Comments()) > 0 {
keep = false
for _, c := range cur.Comments() {
matches = pattern.FindStringSubmatch(c)
if len(matches) < 2 {
err = fmt.Errorf("no date found: %s", c)
} else if date, err = strconv.ParseFloat(matches[1], 64); err != nil {
err = fmt.Errorf("one of the node date is malformed: %s", c)
} else {
ndates = append(ndates, date)
err = nil
keep = true
}
if date, err = cur.date(); err != nil {
keep = false
} else {
ndates = append(ndates, date)
}
} else {
err = fmt.Errorf("a node with no date found")
Expand Down Expand Up @@ -221,3 +210,38 @@ func cutTreeMinDateRecur(cur, prev *Node, e *Edge, mindate float64, dates []floa

return
}

// Parses the date in the field "&date=" from the comments in the newick format
func (n *Node) date() (date float64, err error) {
var pattern *regexp.Regexp
var matches []string
pattern = regexp.MustCompile(`(?i)&date="(.+)"`)

for _, c := range n.Comments() {
matches = pattern.FindStringSubmatch(c)
if len(matches) < 2 {
err = fmt.Errorf("no date found: %s", c)
} else if date, err = strconv.ParseFloat(matches[1], 64); err != nil {
err = fmt.Errorf("one of the node date is malformed: %s", c)
} else {
err = nil
break
}
}
return
}

// CutTreeMaxDate traverses the tree, and keep only the tips that are before the given date
func (t *Tree) CutTreeMaxDate(maxdate float64) (err error) {
var d float64

for _, tip := range t.Tips() {
if d, err = tip.date(); err != nil {
return
}
if d > maxdate {
t.removeTip(tip)
}
}
return
}

0 comments on commit de3a2f0

Please sign in to comment.