diff --git a/tools/ete/ete_species_tree_generator.py b/tools/ete/ete_species_tree_generator.py index c03e89ec..1d3ebd4d 100644 --- a/tools/ete/ete_species_tree_generator.py +++ b/tools/ete/ete_species_tree_generator.py @@ -6,7 +6,7 @@ parser = optparse.OptionParser() parser.add_option('-s', '--species', dest="input_species_filename", - help='Species list in text format one species in each line') + help='List of species names or taxids in text format, one species per line') parser.add_option('-d', '--database', dest="database", default=None, help='ETE sqlite data base to use (default: ~/.etetoolkit/taxa.sqlite)') parser.add_option('-o', '--output', dest="output", help='output file name (default: stdout)') @@ -19,26 +19,31 @@ parser.error("-s option must be specified, Species list in text format one species in each line") ncbi = NCBITaxa(dbfile=options.database) -with open(options.input_species_filename) as f: - species_name = [_.strip().replace('_', ' ') for _ in f.readlines()] - -name2taxid = ncbi.get_name_translator(species_name) -taxid = [name2taxid[_][0] for _ in species_name] - -tree = ncbi.get_topology(taxid) +# determine taxids and species names in the input file +names = set() +taxids = set() +with open(options.input_species_filename) as f: + for species in f: + species = species.strip().replace('_', ' ') + try: + taxids.add(int(species)) + except ValueError: + names.add(species) +# translate all species names to taxids +name2taxid = ncbi.get_name_translator(names) +taxids.update({name2taxid[n][0] for n in names}) + +# get topology and set the scientific name as output +tree = ncbi.get_topology(taxids) +for isleaf, node in tree.iter_prepostorder(): + node.name = node.sci_name if options.treebest == "yes": - inv_map = {str(v[0]): k.replace(" ", "") + "*" for k, v in name2taxid.items()} -else: - inv_map = {str(v[0]): k for k, v in name2taxid.items()} - - -for leaf in tree: - leaf.name = inv_map[leaf.name] + for leaf in tree: + leaf.name = leaf.name.replace(" ", "") + "*" newickTree = tree.write(format=int(options.format)) - if options.treebest == "yes": newickTree = newickTree.rstrip(';') newickTree = newickTree + "root;" diff --git a/tools/ete/ete_species_tree_generator.xml b/tools/ete/ete_species_tree_generator.xml index 10d0bcc8..e2e5cd2f 100644 --- a/tools/ete/ete_species_tree_generator.xml +++ b/tools/ete/ete_species_tree_generator.xml @@ -1,4 +1,4 @@ - + from a list of species using the ETE Toolkit ete_macros.xml @@ -21,10 +21,10 @@ python '$__tool_directory__/ete_species_tree_generator.py' -t $output_format.treebest ]]> - + - + @@ -53,7 +53,9 @@ python '$__tool_directory__/ete_species_tree_generator.py' - + + + @@ -61,6 +63,20 @@ python '$__tool_directory__/ete_species_tree_generator.py' + + + + + + + + + + + + + +