Skip to content

Commit

Permalink
Merge pull request #474 from fransua/master
Browse files Browse the repository at this point in the history
Small fixes
  • Loading branch information
jhcepas authored May 1, 2023
2 parents b5fbc27 + 4c09930 commit eb53950
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 75 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
build
*.pyc
*~
.vscode

#*#
doc/_build/
sdoc/_build/
Expand Down
131 changes: 64 additions & 67 deletions ete3/evol/evoltree.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,20 @@
instances.
"""
from __future__ import absolute_import
from ..tools.utils import which
from .utils import translate
from .model import Model, PARAMS, AVAIL
from ..parser.newick import write_newick
from .. import PhyloNode, SeqGroup
from warnings import warn
import sys
import os
from six.moves import map

__author__ = "Francois-Jose Serra"
__email__ = "francois@barrabin.org"
__licence__ = "GPLv3"
__version__ = "0.0"
__author__ = "Francois-Jose Serra"
__email__ = "francois@barrabin.org"
__licence__ = "GPLv3"
__version__ = "0.0"
__references__ = '''
Yang, Z., Nielsen, R., Goldman, N., & Pedersen, A. M. 2000.
Codon-substitution models for heterogeneous selection pressure at amino acid sites.
Expand All @@ -73,18 +81,10 @@
Retrieved from http://www.ncbi.nlm.nih.gov/pubmed/17483113
'''

import os
import sys
from warnings import warn

from .. import PhyloNode, SeqGroup
from ..parser.newick import write_newick
from .model import Model, PARAMS, AVAIL
from .utils import translate
from ..tools.utils import which
try:
from scipy.stats import chi2
chi_high = lambda x, y: 1 - chi2.cdf(x, y)
def chi_high(x, y): return 1 - chi2.cdf(x, y)
except ImportError:
from .utils import chi_high

Expand All @@ -97,12 +97,14 @@

__all__ = ["EvolNode", "EvolTree"]


def _parse_species(name):
'''
just to return specie name from fasta description
'''
return name[:3]


class EvolNode(PhyloNode):
""" Re-implementation of the standart TreeNode instance. It adds
attributes and methods to work with phylogentic trees.
Expand All @@ -118,7 +120,7 @@ class EvolNode(PhyloNode):

def __init__(self, newick=None, alignment=None, alg_format="fasta",
sp_naming_function=_parse_species, format=0,
binpath='', **kwargs):
binpath='', **kwargs):
'''
freebranch: path to find codeml output of freebranch model.
'''
Expand Down Expand Up @@ -167,11 +169,12 @@ def _label_as_paml(self):
nid = 1
# check we do not have dupplicated names in tree
if (len(self)) != len(set(self.get_leaf_names())):
duplis = [n for n in self.get_leaf_names() if self.get_leaf_names().count(n)>1]
duplis = [n for n in self.get_leaf_names(
) if self.get_leaf_names().count(n) > 1]
raise Exception('EvolTree require unique names for leaves', duplis)
# put ids
for leaf in sorted(self, key=lambda x: x.name):
leaf.add_feature ('node_id', nid)
leaf.add_feature('node_id', nid)
nid += 1
self.add_feature('node_id', nid)
self._label_internal_nodes([nid])
Expand All @@ -187,7 +190,7 @@ def get_descendant_by_node_id(self, idname):
if self.node_id == idname:
return self
except AttributeError:
warn('Should be first labelled as paml ' + \
warn('Should be first labelled as paml ' +
'(automatically done when alignemnt is loaded)')

def _write_algn(self, fullpath):
Expand All @@ -196,12 +199,11 @@ def _write_algn(self, fullpath):
"""
seq_group = SeqGroup()
for n in self:
seq_group.id2seq [n.node_id] = n.nt_sequence
seq_group.id2name [n.node_id] = n.name
seq_group.name2id [n.name ] = n.node_id
seq_group.id2seq[n.node_id] = n.nt_sequence
seq_group.id2name[n.node_id] = n.name
seq_group.name2id[n.name] = n.node_id
seq_group.write(outfile=fullpath, format='paml')


def run_model(self, model_name, ctrl_string='', keep=True, **kwargs):
'''
To compute evolutionnary models. e.g.: b_free_lala.vs.lele, will launch one free branch model, and store
Expand All @@ -226,21 +228,22 @@ def run_model(self, model_name, ctrl_string='', keep=True, **kwargs):
* model-name is compulsory, is the name of the model (see table above for the full list)
* the second part is accessory, it is to avoid over-writing models with the same name.
:argument ctrl_string: list of parameters that can be used as control file.
:argument True keep: links the model to the tree (equivalen of running `EVOL_TREE.link_to_evol_model(MODEL_NAME)`)
:argument kwargs: extra parameters should be one of: %s.
'''
from subprocess import Popen, PIPE
model_obj = Model(model_name, self, **kwargs)
fullpath = os.path.join (self.workdir, model_obj.name)
os.system("mkdir -p %s" %fullpath)
fullpath = os.path.join(self.workdir, model_obj.name)
os.system("mkdir -p %s" % fullpath)
# write tree file
self._write_algn(fullpath + '/algn')
if model_obj.properties['exec'] == 'Slr':
self.write(outfile=fullpath+'/tree', format = (11))
self.write(outfile=fullpath+'/tree', format=(11))
else:
self.write(outfile=fullpath+'/tree',
format = (10 if model_obj.properties['allow_mark'] else 9))
format=(10 if model_obj.properties['allow_mark'] else 9))
# write algn file
## MODEL MODEL MDE
# MODEL MODEL MDE
if ctrl_string == '':
ctrl_string = model_obj.get_ctrl_string(fullpath+'/tmp.ctl')
else:
Expand All @@ -249,34 +252,31 @@ def run_model(self, model_name, ctrl_string='', keep=True, **kwargs):
os.chdir(fullpath)
bin_ = os.path.join(self.execpath, model_obj.properties['exec'])
try:
proc = Popen([bin_, 'tmp.ctl'], stdout=PIPE, stdin=PIPE)
proc = Popen([bin_, 'tmp.ctl'], stdout=PIPE,
stdin=PIPE, stderr=PIPE)
except OSError:
raise Exception(('ERROR: {} not installed, ' +
'or wrong path to binary\n').format(bin_))
run, err = proc.communicate(b'\n') # send \n via stdin in case codeml/slr asks something (note on py3, stdin needs bytes)
# send \n via stdin in case codeml/slr asks something (note on py3, stdin needs bytes)
run, err = proc.communicate(b'\n')
run = run.decode(sys.stdout.encoding)

if err is not None:
warn("ERROR: codeml not found!!!\n" +
" define your variable EvolTree.execpath")
return 1
if 'error' in run or 'Error' in run:
warn("ERROR: inside codeml!!\n" + run)
return 1
os.chdir(hlddir)
if err:
warn("ERROR: inside codeml!!\n" + err)
return 1
if keep:
setattr(model_obj, 'run', run)
self.link_to_evol_model(os.path.join(fullpath,'out'), model_obj)
self.link_to_evol_model(os.path.join(fullpath, 'out'), model_obj)
sep = '\n'
run_model.__doc__ = run_model.__doc__ % \
(sep.join([' %-8s %-27s %-15s ' % \
('%s' % (x), AVAIL[x]['evol'], AVAIL[x]['typ']) for x in sorted (sorted (AVAIL.keys()), key=lambda x: \
AVAIL[x]['typ'],
reverse=True)]),
', '.join(list(PARAMS.keys())))

(sep.join([' %-8s %-27s %-15s ' %
('%s' % (x), AVAIL[x]['evol'], AVAIL[x]['typ']) for x in sorted(sorted(AVAIL.keys()), key=lambda x:
AVAIL[x]['typ'],
reverse=True)]),
', '.join(list(PARAMS.keys())))

#def test_codon_model(self):
# def test_codon_model(self):
# for c_frq in range(4):
# self.run_model('M0.model_test-'+str(c_frq), CodonFreq=c_frq)
# if self.get_most_likely('M0.model_test-1', 'M0.model_test-0') > 0.05:
Expand All @@ -288,7 +288,7 @@ def run_model(self, model_name, ctrl_string='', keep=True, **kwargs):
# self.get_most_likely('M0.model_test-3', 'M0.model_test-2')

def link_to_alignment(self, alignment, alg_format="paml",
nucleotides=True, **kwargs):
nucleotides=True, **kwargs):
'''
same function as for phyloTree, but translate sequences if nucleotides
nucleotidic sequence is kept under node.nt_sequence
Expand Down Expand Up @@ -332,11 +332,11 @@ def show(self, layout=None, tree_style=None, histfaces=None):
except AttributeError:
warn('model %s not computed' % (hist))
if not 'histface' in mdl.properties:
if len(histfaces)>1 and histfaces.index(hist)!=0:
if len(histfaces) > 1 and histfaces.index(hist) != 0:
mdl.set_histface(up=False)
else:
mdl.set_histface()
if mdl.properties ['histface'].up:
if mdl.properties['histface'].up:
ts.aligned_header.add_face(
mdl.properties['histface'], 1)
else:
Expand All @@ -346,7 +346,6 @@ def show(self, layout=None, tree_style=None, histfaces=None):
else:
raise ValueError("Treeview module is disabled")


def render(self, file_name, layout=None, w=None, h=None,
tree_style=None, header=None, histfaces=None):
'''
Expand All @@ -369,11 +368,11 @@ def render(self, file_name, layout=None, w=None, h=None,
except AttributeError:
warn('model %s not computed' % (hist))
if not 'histface' in mdl.properties:
if len(histfaces)>1 and histfaces.index(hist)!=0:
if len(histfaces) > 1 and histfaces.index(hist) != 0:
mdl.set_histface(up=False)
else:
mdl.set_histface()
if mdl.properties ['histface'].up:
if mdl.properties['histface'].up:
ts.aligned_header.add_face(
mdl.properties['histface'], 1)
else:
Expand All @@ -385,7 +384,6 @@ def render(self, file_name, layout=None, w=None, h=None,
else:
raise ValueError("Treeview module is disabled")


def mark_tree(self, node_ids, verbose=False, **kargs):
'''
function to mark branches on tree in order that paml could interpret it.
Expand All @@ -401,11 +399,11 @@ def mark_tree(self, node_ids, verbose=False, **kargs):
'''
from re import match
node_ids = list(map(int , node_ids))
node_ids = list(map(int, node_ids))
if 'marks' in kargs:
marks = list(kargs['marks'])
else:
marks = ['#1']*len (node_ids)
marks = ['#1']*len(node_ids)
for node in self.traverse():
if not hasattr(node, 'node_id'):
continue
Expand All @@ -415,7 +413,8 @@ def mark_tree(self, node_ids, verbose=False, **kargs):
marks[node_ids.index(node.node_id)]) is None) and verbose:
warn('WARNING: marks should be "#" sign directly ' +
'followed by integer\n' + self.mark_tree.__doc__)
node.add_feature('mark', ' '+marks[node_ids.index(node.node_id)])
node.add_feature(
'mark', ' '+marks[node_ids.index(node.node_id)])
elif not 'mark' in node.features:
node.add_feature('mark', '')

Expand All @@ -430,7 +429,7 @@ def link_to_evol_model(self, path, model):
:argument model: either the name of a model, or a Model object (usually empty)
'''
if type(model) == str :
if isinstance(model, str):
model = Model(model, self, path)
else:
model._load(path)
Expand All @@ -443,7 +442,7 @@ def link_to_evol_model(self, path, model):
if not os.path.isfile(path):
warn("ERROR: not a file: " + path)
return 1
if len(self._models) == 1 and model.properties['exec']=='codeml':
if len(self._models) == 1 and model.properties['exec'] == 'codeml':
self.change_dist_to_evol('bL', model, fill=True)

def get_evol_model(self, modelname):
Expand All @@ -456,24 +455,23 @@ def get_evol_model(self, modelname):
try:
return self._models[modelname]
except KeyError:
warn("Model %s not found." % (modelname))

Exception("ERROR: Model %s not found." % (modelname))

def write(self, features=None, outfile=None, format=10):
"""
Inherits from Tree but adds the tenth format, that allows to display marks for CodeML.
TODO: internal writting format need to be something like 0
"""
from re import sub
if int(format)==11:
if int(format) == 11:
nwk = ' %s 1\n' % (len(self))
nwk += sub('\[&&NHX:mark=([ #0-9.]*)\]', r'\1', \
write_newick(self, features=['mark'],format=9))
elif int(format)==10:
nwk = sub('\[&&NHX:mark=([ #0-9.]*)\]', r'\1', \
write_newick(self, features=['mark'],format=9))
nwk += sub('\[&&NHX:mark=([ #0-9.]*)\]', r'\1',
write_newick(self, features=['mark'], format=9))
elif int(format) == 10:
nwk = sub('\[&&NHX:mark=([ #0-9.]*)\]', r'\1',
write_newick(self, features=['mark'], format=9))
else:
nwk = write_newick(self, features=features,format=format)
nwk = write_newick(self, features=features, format=format)
if outfile is not None:
open(outfile, "w").write(nwk)
return nwk
Expand All @@ -482,7 +480,6 @@ def write(self, features=None, outfile=None, format=10):
write.__doc__ += super(PhyloNode, PhyloNode()).write.__doc__.replace(
'argument format', 'argument 10 format')


def get_most_likely(self, altn, null):
'''
Returns pvalue of LRT between alternative model and null model.
Expand Down Expand Up @@ -530,7 +527,7 @@ def get_most_likely(self, altn, null):
return 1.0
try:
if hasattr(altn, 'lnL') and hasattr(null, 'lnL'):
if null.lnL - altn.lnL < 0:
if null.lnL - altn.lnL < 0:
return chi_high(2 * abs(altn.lnL - null.lnL),
float(altn.np - null.np))
else:
Expand Down Expand Up @@ -563,7 +560,7 @@ def change_dist_to_evol(self, evol, model, fill=False):
node.dist = model.branches[node.node_id][evol]
if fill:
for e in ['dN', 'dS', 'w', 'bL']:
node.add_feature(e, model.branches [node.node_id][e])
node.add_feature(e, model.branches[node.node_id][e])


# cosmetic alias
Expand Down
5 changes: 2 additions & 3 deletions ete3/evol/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,8 +217,7 @@ def set_histface(self, up=True, hlines=(1.0, 0.3), kind='bar',
if not 'ylim' in kwargs:
kwargs['ylim'] = (0, 2)
if errors:
errors = self.sites[val]['se'] if 'se' in self.sites[val]\
else None
errors = self.sites[val].get('se', None)
if TREEVIEW:
try:
hist = SequencePlotFace(self.sites[val]['w'], hlines=hlines,
Expand Down Expand Up @@ -336,7 +335,7 @@ def significance_by_site(self, val):
self.sites[val]['class']):
if pval < 0.95:
categories.append('NS')
elif curr_class != self.n_classes[val] and not ps_model:
elif curr_class == self.n_classes[val] and not ps_model:
if pval < 0.99:
categories.append('RX')
else:
Expand Down
10 changes: 5 additions & 5 deletions ete3/evol/parser/codemlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,16 +107,16 @@ def parse_rst(path):
sites[typ].setdefault ('aa', []).append (line[1])
# get site class probability
probs = []
for i in range (k):
for i in range(k):
probs.append (float (line[2+i]))
sites [typ].setdefault ('p'+str(i), []).append (float (line[2+i]))
sites [typ].setdefault ('pv', []).append (max (probs))
sites [typ].setdefault('p' + str(i), []).append(float(line[2+i]))
sites [typ].setdefault ('pv', []).append (max(probs))
# get most likely site class
classe = int (line [3 + i])
sites[typ].setdefault ('class', []).append (classe)
sites[typ].setdefault ('class', []).append(classe)
# if there, get omega and error
try:
sites [typ].setdefault ('w' , []).append (float (line [4 + i]))
sites [typ].setdefault('w' , []).append(float (line [4 + i]))
except IndexError:
# in this case we are with branch-site A or A1 and we should sum
# probabilities of categories 2a and 2b
Expand Down

0 comments on commit eb53950

Please sign in to comment.