From 791415518f60c9ff329383266978102685105d11 Mon Sep 17 00:00:00 2001 From: Jaime Date: Sat, 29 Aug 2020 18:32:44 +0200 Subject: [PATCH 1/5] get_midpoint function does not return None when tree already rooted at midpoint. Returns first child instead --- ete3/coretype/tree.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ete3/coretype/tree.py b/ete3/coretype/tree.py index 26070648e..d0d3e93cd 100644 --- a/ete3/coretype/tree.py +++ b/ete3/coretype/tree.py @@ -1158,6 +1158,11 @@ def get_midpoint_outgroup(self): break else: current = current.up + + # if we reached the root, the tree is already at midpoint. Return any child as valid outgroup + if current is None: + current = self.children[0] + return current def populate(self, size, names_library=None, reuse_names=False, From 7597af36a2d9f36e8a041bda618bae86dd0776d9 Mon Sep 17 00:00:00 2001 From: Jaime Date: Sat, 29 Aug 2020 18:33:32 +0200 Subject: [PATCH 2/5] ncbi_query get_topoly does not break when some taxids are not found. --- ete3/ncbi_taxonomy/ncbiquery.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ete3/ncbi_taxonomy/ncbiquery.py b/ete3/ncbi_taxonomy/ncbiquery.py index 75287d2bd..f13c73119 100644 --- a/ete3/ncbi_taxonomy/ncbiquery.py +++ b/ete3/ncbi_taxonomy/ncbiquery.py @@ -542,9 +542,9 @@ def annotate_tree(self, t, taxid_attr="name", tax2name=None, tax2track=None, tax node_taxid = merged_conversion[node_taxid] n.add_features(sci_name = tax2name.get(node_taxid, getattr(n, taxid_attr, '')), common_name = tax2common_name.get(node_taxid, ''), - lineage = tax2track[node_taxid], + lineage = tax2track.get(node_taxid, []), rank = tax2rank.get(node_taxid, 'Unknown'), - named_lineage = [tax2name.get(tax, str(tax)) for tax in tax2track[node_taxid]]) + named_lineage = [tax2name.get(tax, str(tax)) for tax in tax2track.get(node_taxid, [])]) elif n.is_leaf(): n.add_features(sci_name = getattr(n, taxid_attr, 'NA'), common_name = '', From 7a1f0c596cc5cf2eb709da2ed991cbeb1d79250f Mon Sep 17 00:00:00 2001 From: Jaime Date: Sat, 29 Aug 2020 19:25:39 +0200 Subject: [PATCH 3/5] Fixes #469 NCBI db update crashes due to nocase duplicate synonyms --- ete3/ncbi_taxonomy/ncbiquery.py | 49 ++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/ete3/ncbi_taxonomy/ncbiquery.py b/ete3/ncbi_taxonomy/ncbiquery.py index f13c73119..77136ae82 100644 --- a/ete3/ncbi_taxonomy/ncbiquery.py +++ b/ete3/ncbi_taxonomy/ncbiquery.py @@ -219,7 +219,7 @@ def get_lineage_translator(self, taxids): return id2lineages - + def get_lineage(self, taxid): """Given a valid taxid number, return its corresponding lineage track as a hierarchically sorted list of parent taxids. @@ -241,7 +241,7 @@ def get_lineage(self, taxid): raise ValueError("%s taxid not found" %taxid) else: warnings.warn("taxid %s was translated into %s" %(taxid, merged_conversion[taxid])) - + track = list(map(int, raw_track[0].split(","))) return list(reversed(track)) @@ -326,7 +326,7 @@ def translate_to_names(self, taxids): for sp in taxids: names.append(id2name.get(sp, sp)) return names - + def get_descendant_taxa(self, parent, intermediate_nodes=False, rank_limit=None, collapse_subspecies=False, return_tree=False): """ @@ -342,11 +342,11 @@ def get_descendant_taxa(self, parent, intermediate_nodes=False, rank_limit=None, except KeyError: raise ValueError('%s not found!' %parent) - # checks if taxid is a deprecated one, and converts into the right one. + # checks if taxid is a deprecated one, and converts into the right one. _, conversion = self._translate_merged([taxid]) #try to find taxid in synonyms table - if conversion: + if conversion: taxid = conversion[taxid] - + with open(self.dbfile+".traverse.pkl", "rb") as CACHED_TRAVERSE: prepostorder = pickle.load(CACHED_TRAVERSE) descendants = {} @@ -358,12 +358,12 @@ def get_descendant_taxa(self, parent, intermediate_nodes=False, rank_limit=None, descendants[tid] = descendants.get(tid, 0) + 1 elif found == 2: break - + if not found: raise ValueError("taxid not found:%s" %taxid) elif found == 1: - return [taxid] - + return [taxid] + if rank_limit or collapse_subspecies or return_tree: tree = self.get_topology(list(descendants.keys()), intermediate_nodes=intermediate_nodes, collapse_subspecies=collapse_subspecies, rank_limit=rank_limit) if return_tree: @@ -372,7 +372,7 @@ def get_descendant_taxa(self, parent, intermediate_nodes=False, rank_limit=None, return list(map(int, [n.name for n in tree.get_descendants()])) else: return map(int, [n.name for n in tree]) - + elif intermediate_nodes: return [tid for tid, count in six.iteritems(descendants)] else: @@ -398,7 +398,7 @@ def get_topology(self, taxids, intermediate_nodes=False, rank_limit=None, collap """ from .. import PhyloTree - taxids, merged_conversion = self._translate_merged(taxids) + taxids, merged_conversion = self._translate_merged(taxids) if len(taxids) == 1: root_taxid = int(list(taxids)[0]) with open(self.dbfile+".traverse.pkl", "rb") as CACHED_TRAVERSE: @@ -407,7 +407,7 @@ def get_topology(self, taxids, intermediate_nodes=False, rank_limit=None, collap found = 0 nodes = {} hit = 0 - visited = set() + visited = set() start = prepostorder.index(root_taxid) try: end = prepostorder.index(root_taxid, start+1) @@ -435,7 +435,7 @@ def get_topology(self, taxids, intermediate_nodes=False, rank_limit=None, collap id2lineage = self.get_lineage_translator(taxids) all_taxids = set() for lineage in id2lineage.values(): - all_taxids.update(lineage) + all_taxids.update(lineage) id2rank = self.get_rank(all_taxids) for sp in taxids: track = [] @@ -493,7 +493,7 @@ def annotate_tree(self, t, taxid_attr="name", tax2name=None, tax2track=None, tax :param t: a Tree (or Tree derived) instance. - :param name taxid_attr: Allows to set a custom node attribute + :param name taxid_attr: Allows to set a custom node attribute containing the taxid number associated to each node (i.e. species in PhyloTree instances). @@ -513,7 +513,7 @@ def annotate_tree(self, t, taxid_attr="name", tax2name=None, tax2track=None, tax merged_conversion = {} taxids, merged_conversion = self._translate_merged(taxids) - + if not tax2name or taxids - set(map(int, list(tax2name.keys()))): tax2name = self.get_taxid_translator(taxids) if not tax2track or taxids - set(map(int, list(tax2track.keys()))): @@ -543,7 +543,7 @@ def annotate_tree(self, t, taxid_attr="name", tax2name=None, tax2track=None, tax n.add_features(sci_name = tax2name.get(node_taxid, getattr(n, taxid_attr, '')), common_name = tax2common_name.get(node_taxid, ''), lineage = tax2track.get(node_taxid, []), - rank = tax2rank.get(node_taxid, 'Unknown'), + rank = tax2rank.get(node_taxid, 'Unknown'), named_lineage = [tax2name.get(tax, str(tax)) for tax in tax2track.get(node_taxid, [])]) elif n.is_leaf(): n.add_features(sci_name = getattr(n, taxid_attr, 'NA'), @@ -674,19 +674,30 @@ def load_ncbi_tree_from_dump(tar): name2rank = {} node2common = {} print("Loading node names...") + unique_nocase_synonyms = set() for line in tar.extractfile("names.dmp"): line = str(line.decode()) fields = [_f.strip() for _f in line.split("|")] nodename = fields[0] name_type = fields[3].lower() taxname = fields[1] + + # Clean up tax names so we make sure the don't include quotes. See https://github.com/etetoolkit/ete/issues/469 + taxname = taxname.rstrip('"').lstrip('"') + if name_type == "scientific name": node2taxname[nodename] = taxname if name_type == "genbank common name": node2common[nodename] = taxname elif name_type in set(["synonym", "equivalent name", "genbank equivalent name", "anamorph", "genbank synonym", "genbank anamorph", "teleomorph"]): - synonyms.add( (nodename, taxname) ) + + # Keep track synonyms, but ignore duplicate case-insensitive names. See https://github.com/etetoolkit/ete/issues/469 + synonym_key = (nodename, taxname.lower()) + if synonym_key not in unique_nocase_synonyms: + unique_nocase_synonyms.add(synonym_key) + synonyms.add((nodename, taxname)) + print(len(node2taxname), "names loaded.") print(len(synonyms), "synonyms loaded.") @@ -749,7 +760,7 @@ def update_db(dbfile, targz_file=None): md5_check = md5_file.readline().split()[0] targz_file = "taxdump.tar.gz" do_download = False - + if os.path.exists("taxdump.tar.gz"): local_md5 = md5(open("taxdump.tar.gz", "rb").read()).hexdigest() if local_md5 != md5_check: @@ -786,7 +797,7 @@ def update_db(dbfile, targz_file=None): raise else: os.system("rm syn.tab merged.tab taxa.tab") - # remove only downloaded taxdump file + # remove only downloaded taxdump file if not targz_file: os.system("rm taxdump.tar.gz") From 664cc85a0a69b6a8f943556dcbfd086a409bc713 Mon Sep 17 00:00:00 2001 From: Jaime Date: Sat, 29 Aug 2020 20:04:14 +0200 Subject: [PATCH 4/5] update unitests to cope with NCBI changes. disable skbio test temporarily --- ete3/ncbi_taxonomy/ncbiquery.py | 3 ++- ete3/test/test_interop.py | 10 +++++++++- ete3/test/test_ncbiquery.py | 6 +++--- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/ete3/ncbi_taxonomy/ncbiquery.py b/ete3/ncbi_taxonomy/ncbiquery.py index 77136ae82..a9729bdbc 100644 --- a/ete3/ncbi_taxonomy/ncbiquery.py +++ b/ete3/ncbi_taxonomy/ncbiquery.py @@ -784,7 +784,8 @@ def update_db(dbfile, targz_file=None): print("Updating database: %s ..." %dbfile) generate_table(t) - open("syn.tab", "w").write('\n'.join(["%s\t%s" %(v[0],v[1]) for v in synonyms])) + with open("syn.tab", "w") as SYN: + SYN.write('\n'.join(["%s\t%s" %(v[0],v[1]) for v in synonyms])) with open("merged.tab", "w") as merged: for line in tar.extractfile("merged.dmp"): diff --git a/ete3/test/test_interop.py b/ete3/test/test_interop.py index fe4ebe904..0976d8a8d 100644 --- a/ete3/test/test_interop.py +++ b/ete3/test/test_interop.py @@ -10,7 +10,15 @@ def test_parent_child_table(self): newick = tree.write(format_root_node=True, format=1) self.assertEqual(newick, "(B:0.1,(D:1,E:1.5)C:0.2)A:1;") - def test_skbio(self): + + # Disabled temporarily. following error is reported: + # + # File "/home/travis/build/etetoolkit/ete/test_tmp/miniconda3/envs/test_3.5/lib/python3.5/site-packages/parso/__init__.py", line 41, in + # from parso.parser import ParserSyntaxError + # File "/home/travis/build/etetoolkit/ete/test_tmp/miniconda3/envs/test_3.5/lib/python3.5/site-packages/parso/parser.py", line 113 + # node_map: Dict[str, type] = {} + # SyntaxError: invalid syntax + def disabled_test_skbio(self): from skbio import TreeNode skb_tree = TreeNode.read([u"(B:0.1,(D:1,E:1.5)C:0.2)A:1;"]) for node in skb_tree.traverse(): diff --git a/ete3/test/test_ncbiquery.py b/ete3/test/test_ncbiquery.py index dbe631fa3..15e64ec1a 100644 --- a/ete3/test/test_ncbiquery.py +++ b/ete3/test/test_ncbiquery.py @@ -64,9 +64,9 @@ def test_ncbiquery(self): #Out[10]: [63221, 741158, 2665953, 1425170] self.assertEqual(set(out), set([63221, 741158, 2665953, 1425170])) - out = ncbi.get_descendant_taxa("9605", intermediate_nodes=False, rank_limit="species") - #Out[11]: [9606, 1425170] - self.assertEqual(set(out), set([9606, 1425170])) + out = ncbi.get_descendant_taxa("9596", intermediate_nodes=False, rank_limit="species") + #Out[11]: [9597, 9598] + self.assertEqual(set(out), set([9597, 9598])) def test_get_topology(self): ncbi = NCBITaxa(dbfile=DATABASE_PATH) From 97e0ce9fab2f2286c69fb31577df75684613e769 Mon Sep 17 00:00:00 2001 From: Jaime Date: Sun, 30 Aug 2020 08:47:17 +0200 Subject: [PATCH 5/5] release 3.1.2 --- VERSION | 2 +- ete3/treeview/layouts.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/VERSION b/VERSION index ef538c281..6ebad1488 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -3.1.2 +3.1.2 \ No newline at end of file diff --git a/ete3/treeview/layouts.py b/ete3/treeview/layouts.py index 2c1c4e866..a319e1c03 100644 --- a/ete3/treeview/layouts.py +++ b/ete3/treeview/layouts.py @@ -45,13 +45,14 @@ def basic(node): if node.is_leaf(): - #node.img_style["size"]=1 - #node.img_style["shape"] = "circle" + node.img_style["size"]=2 + node.img_style["shape"] = "square" faces.add_face_to_node(faces.AttrFace("name","Arial",10,"#4f8f0f",None), node, 0 ) def phylogeny(node): leaf_color = "#000000" - node.img_style["shape"] = "circle" + node.img_style["shape"] = "square" + node.img_style["size"] = 2 if hasattr(node,"evoltype"): if node.evoltype == 'D': node.img_style["fgcolor"] = "#FF0000" @@ -71,14 +72,14 @@ def phylogeny(node): if node.is_leaf(): node.img_style["shape"] = "square" - node.img_style["size"] = 4 + node.img_style["size"] = 2 node.img_style["fgcolor"] = leaf_color faces.add_face_to_node( faces.AttrFace("name","Arial",11,leaf_color,None), node, 0 ) if hasattr(node,"sequence"): SequenceFace = faces.SequenceFace(node.sequence,"aa",13) faces.add_face_to_node(SequenceFace, node, 1, aligned=True) else: - node.img_style["size"] = 6 + node.img_style["size"] = 2 def heatmap(node): square_size = 10