diff --git a/HISTORY.md b/HISTORY.md index 18c3396..71273fc 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -2,6 +2,17 @@ ## dev +**Deprecation** + +* `Taxonomy.from_json`, `load`, `Taxonomy.from_taxdump` and `load_ncbi` are deprecated. See v2.4.0 notes for replacements + +**New features** + +* It is now possible to create Newick tree with `Taxonomy.toNewick` +* It is now possible to clip Taxonomies with `Taxonomy.clip()`, which behaves similarly to `Taxonomy.prune()` but removes upstream nodes and reroots tree. + +## 2.5.2 + **Improvements** * Update docstrings diff --git a/docs/api_doc/constructors.md b/docs/api_doc/factories.md similarity index 100% rename from docs/api_doc/constructors.md rename to docs/api_doc/factories.md diff --git a/mkdocs.yml b/mkdocs.yml index 1990978..71f6b78 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -15,7 +15,7 @@ nav: - Predictions vs. expectations: recipes/verify_blast.md - API reference: - Taxonomy: api_doc/taxonomy.md - - Constructors: api_doc/constructors.md + - Constructors: api_doc/factories.md - Nodes: api_doc/nodes.md - Lineage: api_doc/lineage.md - About: diff --git a/taxidTools/Taxonomy.py b/taxidTools/Taxonomy.py index f8abc4e..4b2fc02 100644 --- a/taxidTools/Taxonomy.py +++ b/taxidTools/Taxonomy.py @@ -144,93 +144,6 @@ def from_list(cls, node_list: list[_BaseNode]) -> Taxonomy: return cls(as_dict) - @classmethod - def from_taxdump(cls, nodes: str, rankedlineage: str) -> Taxonomy: - """ - Create a Taxonomy object from the NBI Taxdump files - - .. deprecated:: 2.4.0 - `Taxonomy.from_taxdump` will be removed in 3.0.0, it is replaced by - `read_taxdump`, a module level constructor. - - Load the taxonomic infromation form the nodes.dmp and - rankedlineage.dmp files available from the NCBI servers. - - Parameters - ---------- - nodes: - Path to the nodes.dmp file - rankedlineage: - Path to the rankedlineage.dmp file - - Examples - -------- - >>> tax = Taxonomy.from_taxdump("nodes.dmp', 'rankedlineage.dmp') - """ - _deprecation('Taxonomy.from_taxdump()', 'read_taxdump()') - - txd = {} - parent_dict = {} - - # Creating nodes - for line in _parse_dump(nodes): - txd[line[0]] = Node(taxid=line[0], rank=str(line[2])) - parent_dict[str(line[0])] = line[1] # storing parent id - - # Add names form rankedlineage - for line in _parse_dump(rankedlineage): - txd[line[0]].name = line[1] - - # Update parent info - for k, v in parent_dict.items(): - txd[k].parent = txd[v] - - return cls(txd) - - @classmethod - def from_json(cls, path: str) -> Taxonomy: - """ - Load a Taxonomy from a previously exported json file. - - .. deprecated:: 2.4.0 - `Taxonomy.from_json` will be removed in 3.0.0, it is replaced by - `read_json`, a module level constructor. - - Parameters - ---------- - path: - Path of file to load - - See Also - -------- - Taxonomy.write - """ - _deprecation('Taxonomy.from_json()', 'read_json()') - - # parse json - with open(path, 'r') as fi: - parser = json.loads(fi.read()) - - txd = {} - parent_dict = {} - - # Create nodes from records - for record in parser: - class_call = eval(record['type']) - txd[record['_taxid']] = class_call(taxid=record['_taxid'], - name=record['_name'], - rank=record['_rank']) - parent_dict[record['_taxid']] = record['_parent'] - - # Update parent info - for k, v in parent_dict.items(): - try: - txd[k].parent = txd[v] - except KeyError: - pass - - return cls(txd) - def copy(self) -> Taxonomy: """ Create a deepcopy of the current Taxonomy instance. @@ -725,6 +638,10 @@ def prune(self, taxid: Union[str, int], inplace: Optional[bool] = True) -> None: ------- None + See Also + -------- + Taxonomy.clip + Examples -------- >>> node0 = Node(taxid = 0, name = "root", @@ -748,7 +665,7 @@ def prune(self, taxid: Union[str, int], inplace: Optional[bool] = True) -> None: But other branches are gone >>> tax.get('2') - KeyError: '2' + None We can keep a copy of the: @@ -894,55 +811,108 @@ def write(self, path: str) -> None: with open(path, 'w') as fi: fi.write(writer) + def clip(self, taxid: Union[str, int], inplace: Optional[bool] = True) -> None: + """ + Clip the Taxonomy at the given taxid -def load(path: str) -> Taxonomy: - """ - Load a Taxonomy from a previously exported json file. + Nodes not in the lineage (upwards and downwards) + of the given taxid will be discarded. + The Ancestors of the given taxid will NOT be kept and the given + node will become the new root! - .. deprecated:: 2.4.0 - `load` will be removed in 3.0.0, it is replaced by - `read_json`, a module level constructor. + Parameters + ---------- + taxid: str or int + taxid whose Lineage to keep + inplace: bool, optional + perfrom the operation inplace and mutate the underlying objects + or return a mutated copy of the instance, keep the original unchanged - Parameters - ---------- - path: - Path of file to load + Returns + ------- + None - See Also - -------- - Taxonomy.write - load_ncbi - """ - _deprecation('load()', 'read_json()') - return Taxonomy.from_json(path) + See Also + -------- + Taxonomy.prune + Examples + -------- + >>> node0 = Node(taxid = 0, name = "root", + rank = "root", parent = None) + >>> node1 = Node(taxid = 1, name = "node1", + rank = "rank1", parent = node0) + >>> node2 = Node(taxid = 2, name = "node2", + rank = "rank1", parent = node0) + >>> node11 = Node(taxid = 11, name = "node11", + rank = "rank2", parent = node1) + >>> node12 = Node(taxid = 12, name = "node12", + rank = "rank2", parent = node1) + >>> tax = Taxonomy.from_list([node0, node1, node2, node11, node12]) + >>> tax.clip(1) -def load_ncbi(nodes: str, rankedlineage: str) -> Taxonomy: - """ - Load a Taxonomy from the NCBI`s taxdump files + Ancestry not kept - .. deprecated:: 2.4.0 - `load_ncbi` will be removed in 3.0.0, it is replaced by - `read_ncbi`, a better-named module level constructor. + >>> tax.getAncestry(11) + Lineage([Node(11)]) - Parameters - ---------- - nodes: - Path to the nodes.dmp file - rankedlineage: - Path to the rankedlineage.dmp file + Other branches are gone - Examples - -------- - >>> tax = load_ncbi("nodes.dmp', 'rankedlineage.dmp') + >>> tax.get('2') + None + """ + if inplace: + tax = self + else: + tax = self.copy() - See Also - -------- - Taxonomy.from_taxdump - load - """ - _deprecation('load_ncbi()', 'read_taxdump()') - return Taxonomy.from_taxdump(nodes, rankedlineage) + # Getting upstream nodes + nodes = tax.getAncestry(taxid) + + # Removing upstream nodes + for i in range(1, len(nodes)): + tax.pop(nodes[i]) + + # Rerooting taxonomy + taxid.parent = None + tax.root = taxid + + if not inplace: + return tax + + def toNewick(self, names: str = 'name') -> str: + """ + Generate a Newock string fro the current taxonomy + + Export as Newick tree string for compatibility with other packages + Import in ETE with format 8 (all names). + Experimental feature + + Parameters + ---------- + names: str + Node attribute to use as node name, choice of 'name' or 'taxid' + + Returns + ------- + str + """ + + def subtree(node, names): + if names == 'name': + namestring = str(node.name.replace(" ", "_")) + elif names == 'taxid': + namestring = str(node.taxid) + + if not node.children: + return namestring + subtrees = [subtree(child) for child in node.children] + return f"({','.join(subtrees)}){namestring}" + + if names not in ['name', 'taxid']: + raise ValueError("Parameter 'names' must be either 'name' or 'taxid'") + + return f"{subtree(self.root, names)};" def _flatten(t: list) -> list: @@ -1015,13 +985,3 @@ def _insert_dummies(node, next_rank): dummy = DummyNode(rank=next_rank, parent=node) dummies.append(dummy) return dummies - - -# Class methods using this here are pending deprecation, remove form this module in 3.0.0 -def _parse_dump(filepath: str) -> Iterator: - """ - Dump file line iterator, returns a yields of fields - """ - with open(filepath, 'r') as dmp: - for line in dmp: - yield [item.strip() for item in line.split("|")] diff --git a/taxidTools/__init__.py b/taxidTools/__init__.py index dc49938..0e529be 100644 --- a/taxidTools/__init__.py +++ b/taxidTools/__init__.py @@ -1,5 +1,5 @@ from .Node import Node, DummyNode, MergedNode -from .Taxonomy import Taxonomy, load, load_ncbi +from .Taxonomy import Taxonomy from .Lineage import Lineage from .factories import read_json, read_taxdump from .utils import linne @@ -9,7 +9,7 @@ from .__version__ import __url__ __all__ = ['Node', 'DummyNode', 'MergedNode', - 'Taxonomy', 'load', 'load_ncbi', + 'Taxonomy', 'Lineage', 'read_json', 'read_taxdump', 'linne', diff --git a/taxidTools/__version__.py b/taxidTools/__version__.py index 0e0276f..ce88d35 100644 --- a/taxidTools/__version__.py +++ b/taxidTools/__version__.py @@ -1,4 +1,4 @@ -__version__ = "2.5.1" +__version__ = "2.5.2" __title__ = "taxidTools" __description__ = "A Python Toolkit for Taxonomy" __author__ = "Gregoire Denay" diff --git a/tests/test_Taxonomy.py b/tests/test_Taxonomy.py index 9e83f09..b5c283f 100644 --- a/tests/test_Taxonomy.py +++ b/tests/test_Taxonomy.py @@ -87,8 +87,7 @@ def test_ancestry_tests(self): def test_copy(self): self.new = self.txd.copy() self.txd.data = {} - with self.assertRaises(taxidTools.InvalidNodeError): - _ = self.txd[0] + self.assertRaises(taxidTools.InvalidNodeError, self.txd[0]) self.assertIsNotNone(self.new.get('0', None)) def test_InvalidNodeError(self): diff --git a/tests/test_complextree.py b/tests/test_complextree.py index fb38bdd..41afc37 100644 --- a/tests/test_complextree.py +++ b/tests/test_complextree.py @@ -2,6 +2,7 @@ import taxidTools from taxidTools.Taxonomy import _insert_nodes_recc, _insert_dummies + class TestComplexTree(unittest.TestCase): # Test Tree # @@ -152,3 +153,10 @@ def test_insert_nodes_recc(self): self.assertEqual(len(new_nodes), 12) self.assertRaises(ValueError, _insert_nodes_recc, self.node0, ['root']) + + def test_clip(self): + subtree = self.txd.clip(1) + self.assertEqual(self.txd.root, self.txd['1']) + self.assertIsNone(self.txd['1'].parent) + self.assertRaises(taxidTools.InvalidNodeError, self.txd.__getitem__, '0') + self.assertRaises(taxidTools.InvalidNodeError, self.txd.__getitem__, '2')