Skip to content

Commit

Permalink
Merge branch 'trees' into HEAD
Browse files Browse the repository at this point in the history
  • Loading branch information
gregdenay committed Jun 14, 2024
2 parents c43783e + 40eb539 commit 5c2adc0
Show file tree
Hide file tree
Showing 8 changed files with 121 additions and 143 deletions.
11 changes: 11 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,17 @@

## dev

**Deprecation**

* `Taxonomy.from_json`, `load`, `Taxonomy.from_taxdump` and `load_ncbi` are deprecated. See v2.4.0 notes for replacements

**New features**

* It is now possible to create Newick tree with `Taxonomy.toNewick`
* It is now possible to clip Taxonomies with `Taxonomy.clip()`, which behaves similarly to `Taxonomy.prune()` but removes upstream nodes and reroots tree.

## 2.5.2

**Improvements**

* Update docstrings
Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ nav:
- Predictions vs. expectations: recipes/verify_blast.md
- API reference:
- Taxonomy: api_doc/taxonomy.md
- Constructors: api_doc/constructors.md
- Constructors: api_doc/factories.md
- Nodes: api_doc/nodes.md
- Lineage: api_doc/lineage.md
- About:
Expand Down
234 changes: 97 additions & 137 deletions taxidTools/Taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,93 +144,6 @@ def from_list(cls, node_list: list[_BaseNode]) -> Taxonomy:

return cls(as_dict)

@classmethod
def from_taxdump(cls, nodes: str, rankedlineage: str) -> Taxonomy:
"""
Create a Taxonomy object from the NBI Taxdump files
.. deprecated:: 2.4.0
`Taxonomy.from_taxdump` will be removed in 3.0.0, it is replaced by
`read_taxdump`, a module level constructor.
Load the taxonomic infromation form the nodes.dmp and
rankedlineage.dmp files available from the NCBI servers.
Parameters
----------
nodes:
Path to the nodes.dmp file
rankedlineage:
Path to the rankedlineage.dmp file
Examples
--------
>>> tax = Taxonomy.from_taxdump("nodes.dmp', 'rankedlineage.dmp')
"""
_deprecation('Taxonomy.from_taxdump()', 'read_taxdump()')

txd = {}
parent_dict = {}

# Creating nodes
for line in _parse_dump(nodes):
txd[line[0]] = Node(taxid=line[0], rank=str(line[2]))
parent_dict[str(line[0])] = line[1] # storing parent id

# Add names form rankedlineage
for line in _parse_dump(rankedlineage):
txd[line[0]].name = line[1]

# Update parent info
for k, v in parent_dict.items():
txd[k].parent = txd[v]

return cls(txd)

@classmethod
def from_json(cls, path: str) -> Taxonomy:
"""
Load a Taxonomy from a previously exported json file.
.. deprecated:: 2.4.0
`Taxonomy.from_json` will be removed in 3.0.0, it is replaced by
`read_json`, a module level constructor.
Parameters
----------
path:
Path of file to load
See Also
--------
Taxonomy.write
"""
_deprecation('Taxonomy.from_json()', 'read_json()')

# parse json
with open(path, 'r') as fi:
parser = json.loads(fi.read())

txd = {}
parent_dict = {}

# Create nodes from records
for record in parser:
class_call = eval(record['type'])
txd[record['_taxid']] = class_call(taxid=record['_taxid'],
name=record['_name'],
rank=record['_rank'])
parent_dict[record['_taxid']] = record['_parent']

# Update parent info
for k, v in parent_dict.items():
try:
txd[k].parent = txd[v]
except KeyError:
pass

return cls(txd)

def copy(self) -> Taxonomy:
"""
Create a deepcopy of the current Taxonomy instance.
Expand Down Expand Up @@ -725,6 +638,10 @@ def prune(self, taxid: Union[str, int], inplace: Optional[bool] = True) -> None:
-------
None
See Also
--------
Taxonomy.clip
Examples
--------
>>> node0 = Node(taxid = 0, name = "root",
Expand All @@ -748,7 +665,7 @@ def prune(self, taxid: Union[str, int], inplace: Optional[bool] = True) -> None:
But other branches are gone
>>> tax.get('2')
KeyError: '2'
None
We can keep a copy of the:
Expand Down Expand Up @@ -894,55 +811,108 @@ def write(self, path: str) -> None:
with open(path, 'w') as fi:
fi.write(writer)

def clip(self, taxid: Union[str, int], inplace: Optional[bool] = True) -> None:
"""
Clip the Taxonomy at the given taxid
def load(path: str) -> Taxonomy:
"""
Load a Taxonomy from a previously exported json file.
Nodes not in the lineage (upwards and downwards)
of the given taxid will be discarded.
The Ancestors of the given taxid will NOT be kept and the given
node will become the new root!
.. deprecated:: 2.4.0
`load` will be removed in 3.0.0, it is replaced by
`read_json`, a module level constructor.
Parameters
----------
taxid: str or int
taxid whose Lineage to keep
inplace: bool, optional
perfrom the operation inplace and mutate the underlying objects
or return a mutated copy of the instance, keep the original unchanged
Parameters
----------
path:
Path of file to load
Returns
-------
None
See Also
--------
Taxonomy.write
load_ncbi
"""
_deprecation('load()', 'read_json()')
return Taxonomy.from_json(path)
See Also
--------
Taxonomy.prune
Examples
--------
>>> node0 = Node(taxid = 0, name = "root",
rank = "root", parent = None)
>>> node1 = Node(taxid = 1, name = "node1",
rank = "rank1", parent = node0)
>>> node2 = Node(taxid = 2, name = "node2",
rank = "rank1", parent = node0)
>>> node11 = Node(taxid = 11, name = "node11",
rank = "rank2", parent = node1)
>>> node12 = Node(taxid = 12, name = "node12",
rank = "rank2", parent = node1)
>>> tax = Taxonomy.from_list([node0, node1, node2, node11, node12])
>>> tax.clip(1)
def load_ncbi(nodes: str, rankedlineage: str) -> Taxonomy:
"""
Load a Taxonomy from the NCBI`s taxdump files
Ancestry not kept
.. deprecated:: 2.4.0
`load_ncbi` will be removed in 3.0.0, it is replaced by
`read_ncbi`, a better-named module level constructor.
>>> tax.getAncestry(11)
Lineage([Node(11)])
Parameters
----------
nodes:
Path to the nodes.dmp file
rankedlineage:
Path to the rankedlineage.dmp file
Other branches are gone
Examples
--------
>>> tax = load_ncbi("nodes.dmp', 'rankedlineage.dmp')
>>> tax.get('2')
None
"""
if inplace:
tax = self
else:
tax = self.copy()

See Also
--------
Taxonomy.from_taxdump
load
"""
_deprecation('load_ncbi()', 'read_taxdump()')
return Taxonomy.from_taxdump(nodes, rankedlineage)
# Getting upstream nodes
nodes = tax.getAncestry(taxid)

# Removing upstream nodes
for i in range(1, len(nodes)):
tax.pop(nodes[i])

# Rerooting taxonomy
taxid.parent = None
tax.root = taxid

if not inplace:
return tax

def toNewick(self, names: str = 'name') -> str:
"""
Generate a Newock string fro the current taxonomy
Export as Newick tree string for compatibility with other packages
Import in ETE with format 8 (all names).
Experimental feature
Parameters
----------
names: str
Node attribute to use as node name, choice of 'name' or 'taxid'
Returns
-------
str
"""

def subtree(node, names):
if names == 'name':
namestring = str(node.name.replace(" ", "_"))
elif names == 'taxid':
namestring = str(node.taxid)

if not node.children:
return namestring
subtrees = [subtree(child) for child in node.children]
return f"({','.join(subtrees)}){namestring}"

if names not in ['name', 'taxid']:
raise ValueError("Parameter 'names' must be either 'name' or 'taxid'")

return f"{subtree(self.root, names)};"


def _flatten(t: list) -> list:
Expand Down Expand Up @@ -1015,13 +985,3 @@ def _insert_dummies(node, next_rank):
dummy = DummyNode(rank=next_rank, parent=node)
dummies.append(dummy)
return dummies


# Class methods using this here are pending deprecation, remove form this module in 3.0.0
def _parse_dump(filepath: str) -> Iterator:
"""
Dump file line iterator, returns a yields of fields
"""
with open(filepath, 'r') as dmp:
for line in dmp:
yield [item.strip() for item in line.split("|")]
4 changes: 2 additions & 2 deletions taxidTools/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .Node import Node, DummyNode, MergedNode
from .Taxonomy import Taxonomy, load, load_ncbi
from .Taxonomy import Taxonomy
from .Lineage import Lineage
from .factories import read_json, read_taxdump
from .utils import linne
Expand All @@ -9,7 +9,7 @@
from .__version__ import __url__

__all__ = ['Node', 'DummyNode', 'MergedNode',
'Taxonomy', 'load', 'load_ncbi',
'Taxonomy',
'Lineage',
'read_json', 'read_taxdump',
'linne',
Expand Down
2 changes: 1 addition & 1 deletion taxidTools/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "2.5.1"
__version__ = "2.5.2"
__title__ = "taxidTools"
__description__ = "A Python Toolkit for Taxonomy"
__author__ = "Gregoire Denay"
Expand Down
3 changes: 1 addition & 2 deletions tests/test_Taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ def test_ancestry_tests(self):
def test_copy(self):
self.new = self.txd.copy()
self.txd.data = {}
with self.assertRaises(taxidTools.InvalidNodeError):
_ = self.txd[0]
self.assertRaises(taxidTools.InvalidNodeError, self.txd[0])
self.assertIsNotNone(self.new.get('0', None))

def test_InvalidNodeError(self):
Expand Down
8 changes: 8 additions & 0 deletions tests/test_complextree.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import taxidTools
from taxidTools.Taxonomy import _insert_nodes_recc, _insert_dummies


class TestComplexTree(unittest.TestCase):
# Test Tree
#
Expand Down Expand Up @@ -152,3 +153,10 @@ def test_insert_nodes_recc(self):
self.assertEqual(len(new_nodes), 12)

self.assertRaises(ValueError, _insert_nodes_recc, self.node0, ['root'])

def test_clip(self):
subtree = self.txd.clip(1)
self.assertEqual(self.txd.root, self.txd['1'])
self.assertIsNone(self.txd['1'].parent)
self.assertRaises(taxidTools.InvalidNodeError, self.txd.__getitem__, '0')
self.assertRaises(taxidTools.InvalidNodeError, self.txd.__getitem__, '2')

0 comments on commit 5c2adc0

Please sign in to comment.