diff --git a/cmd/collapsebrname.go b/cmd/collapsebrname.go new file mode 100644 index 0000000..d963cff --- /dev/null +++ b/cmd/collapsebrname.go @@ -0,0 +1,102 @@ +package cmd + +import ( + "fmt" + goio "io" + "os" + + "github.com/evolbioinfo/gotree/io" + "github.com/evolbioinfo/gotree/tree" + "github.com/spf13/cobra" +) + +var brnamefile string +var brid bool + +// collapseCmd represents the collapse command +var collapsebrnameCmd = &cobra.Command{ + Use: "name", + Short: "Collapse branches having given name or ID", + Long: `Collapse branches having given name or ID. + + Names (or ID) are defined in an input file (-b) + + If an external branch name/id is given, then does not do anything. +`, + RunE: func(cmd *cobra.Command, args []string) (err error) { + var f *os.File + var treefile goio.Closer + var treechan <-chan tree.Trees + var brnames []string + var brids []int + var toremove []*tree.Edge + + if f, err = openWriteFile(outtreefile); err != nil { + io.LogError(err) + return + } + defer closeWriteFile(f, outtreefile) + + if treefile, treechan, err = readTrees(intreefile); err != nil { + io.LogError(err) + return + } + defer treefile.Close() + + if brid { + + if brids, err = parseIntFile(brnamefile); err != nil { + io.LogError(err) + return + } + } else { + if brnames, err = parseStringFile(brnamefile); err != nil { + io.LogError(err) + return + } + } + + for t := range treechan { + if t.Err != nil { + io.LogError(t.Err) + return t.Err + } + t.Tree.ReinitIndexes() + alledges := t.Tree.Edges() + if brid { + for _, i := range brids { + if i < 0 || i >= len(alledges) { + err = fmt.Errorf("branch index is not in the tree (<0 or >#branches)") + io.LogError(err) + return + } + toremove = append(toremove, alledges[i]) + } + } else { + for _, n := range brnames { + found := false + for _, e := range alledges { + if e.Name(t.Tree.Rooted()) == n { + toremove = append(toremove, e) + found = true + } + } + if !found { + err = fmt.Errorf("branch name %s not found in the tree", n) + io.LogError(err) + return + } + } + } + t.Tree.RemoveEdges(true, false, toremove...) + f.WriteString(t.Tree.Newick() + "\n") + } + return + }, +} + +func init() { + collapseCmd.AddCommand(collapsebrnameCmd) + collapsebrnameCmd.Flags().StringVarP(&brnamefile, "brfile", "b", "none", "File with one branch name/id per line") + collapsebrnameCmd.Flags().BoolVar(&brid, "id", false, "Input file contains branch ids (otherwise, branch names)") +} diff --git a/cmd/root.go b/cmd/root.go index cefbb71..da035c9 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -178,28 +178,61 @@ func readTree(infile string) (t *tree.Tree, err error) { return } -func parseTipsFile(file string) (tips []string, err error) { +// Parse a file with one string per line +func parseStringFile(file string) (s []string, err error) { + var ifilereader *bufio.Reader + var ifile goio.Closer + var line string + var err2 error - var treereader *bufio.Reader - var treefile goio.Closer + s = make([]string, 0, 100) + + if ifile, ifilereader, err = utils.GetReader(file); err == nil { + line, err2 = Readln(ifilereader) + for err2 == nil { + for _, name := range strings.Split(line, ",") { + s = append(s, name) + } + line, err2 = Readln(ifilereader) + } + ifile.Close() + } + return +} + +// Parse a file with one int per line +func parseIntFile(file string) (islice []int, err error) { + var ifilereader *bufio.Reader + var ifile goio.Closer var line string var err2 error + var tempi int - tips = make([]string, 0, 100) + islice = make([]int, 0, 100) - if treefile, treereader, err = utils.GetReader(file); err == nil { - line, err2 = Readln(treereader) + if ifile, ifilereader, err = utils.GetReader(file); err == nil { + line, err2 = Readln(ifilereader) for err2 == nil { for _, name := range strings.Split(line, ",") { - tips = append(tips, name) + + if tempi, err = strconv.Atoi(name); err != nil { + return + } + islice = append(islice, tempi) } - line, err2 = Readln(treereader) + line, err2 = Readln(ifilereader) } - treefile.Close() + ifile.Close() } return } +// Parse a file with one tip name per line +func parseTipsFile(file string) (tips []string, err error) { + tips, err = parseStringFile(file) + return +} + func readMapFile(file string, revert bool) (map[string]string, error) { outmap := make(map[string]string, 0) var mapfile *os.File diff --git a/test.sh b/test.sh index 5af5417..bc5328a 100755 --- a/test.sh +++ b/test.sh @@ -315,6 +315,35 @@ ${GOTREE} generate yuletree --seed 10 | ${GOTREE} collapse depth -m 2 -M 2 | ${G diff -q -b result expected rm -f expected result +# gotree collapse id +echo "->gotree collapse id/name" +cat > input < expected < expected2 < br < br2 < result +diff -q -b result expected +${GOTREE} collapse name -i input --id -b br2 > result +diff -q -b result expected2 +rm -f input expected result br br2 expected2 + # gotree collapse single echo "->gotree collapse single" cat > test_input <