diff --git a/cobra/core/__init__.py b/cobra/core/__init__.py index 25d679371..affa4369f 100644 --- a/cobra/core/__init__.py +++ b/cobra/core/__init__.py @@ -9,5 +9,6 @@ from cobra.core.model import Model from cobra.core.object import Object from cobra.core.reaction import Reaction -from cobra.core.solution import LegacySolution, Solution, get_solution +from cobra.core.group import Group +from cobra.core.solution import Solution, LegacySolution, get_solution from cobra.core.species import Species diff --git a/cobra/core/gene.py b/cobra/core/gene.py index c5d0f9a7c..938780d67 100644 --- a/cobra/core/gene.py +++ b/cobra/core/gene.py @@ -249,6 +249,11 @@ def remove_from_model(self, model=None, the_gene_re = re.compile('(^|(?<=( |\()))%s(?=( |\)|$))' % re.escape(self.id)) + # remove reference to the gene in all groups + associated_groups = self._model.get_associated_groups(self) + for group in associated_groups: + group.remove_members(self) + self._model.genes.remove(self) self._model = None diff --git a/cobra/core/group.py b/cobra/core/group.py new file mode 100644 index 000000000..914e151b8 --- /dev/null +++ b/cobra/core/group.py @@ -0,0 +1,109 @@ +# -*- coding: utf-8 -*- + +"""Define the group class.""" + +from __future__ import absolute_import + +from warnings import warn + +from six import string_types + +from cobra.core.object import Object + + +class Group(Object): + """ + Manage groups via this implementation of the SBML group specification. + + `Group` is a class for holding information regarding a pathways, + subsystems, or other custom groupings of objects within a cobra.Model + object. + + Parameters + ---------- + id : str + The identifier to associate with this group + name : str, optional + A human readable name for the group + members : iterable, optional + A list object containing references to cobra.Model-associated objects + that belong to the group. + kind : {"collection", "classification", "partonomy"}, optional + The kind of group, as specified for the Groups feature in the SBML + level 3 package specification. Can be any of "classification", + "partonomy", or "collection". The default is "collection". + Please consult the SBML level 3 package specification to ensure you + are using the proper value for kind. In short, members of a + "classification" group should have an "is-a" relationship to the group + (e.g. member is-a polar compound, or member is-a transporter). + Members of a "partonomy" group should have a "part-of" relationship + (e.g. member is part-of glycolysis). Members of a "collection" group + do not have an implied relationship between the members, so use this + value for kind when in doubt (e.g. member is a gap-filled reaction, + or member is involved in a disease phenotype). + """ + KIND_TYPES = ("collection", "classification", "partonomy") + + def __init__(self, id, name='', members=None, kind=None): + Object.__init__(self, id, name) + + self._members = set() if members is None else set(members) + self._kind = None + self.kind = "collection" if kind is None else kind + # self.model is None or refers to the cobra.Model that + # contains self + self._model = None + + # read-only + @property + def members(self): + return self._members + + @property + def kind(self): + return self._kind + + @kind.setter + def kind(self, kind): + kind = kind.lower() + if kind in self.KIND_TYPES: + self._kind = kind + else: + raise ValueError( + "Kind can only by one of: {}.".format(", ".join( + self.KIND_TYPES))) + + def add_members(self, new_members): + """ + Add objects to the group. + + Parameters + ---------- + new_members : list + A list of cobrapy objects to add to the group. + + """ + + if isinstance(new_members, string_types) or \ + hasattr(new_members, "id"): + warn("need to pass in a list") + new_members = [new_members] + + self._members.update(new_members) + + def remove_members(self, to_remove): + """ + Remove objects from the group. + + Parameters + ---------- + to_remove : list + A list of cobra objects to remove from the group + """ + + if isinstance(to_remove, string_types) or \ + hasattr(to_remove, "id"): + warn("need to pass in a list") + to_remove = [to_remove] + + self._members.difference_update(to_remove) diff --git a/cobra/core/metabolite.py b/cobra/core/metabolite.py index ae6753db3..717f2d7f4 100644 --- a/cobra/core/metabolite.py +++ b/cobra/core/metabolite.py @@ -79,7 +79,7 @@ def elements(self): # necessary for some old pickles which use the deprecated # Formula class tmp_formula = str(self.formula) - # commonly occuring characters in incorrectly constructed formulas + # commonly occurring characters in incorrectly constructed formulas if "*" in tmp_formula: warn("invalid character '*' found in formula '%s'" % self.formula) tmp_formula = tmp_formula.replace("*", "") diff --git a/cobra/core/model.py b/cobra/core/model.py index 4e6ea1a40..0ffb5918f 100644 --- a/cobra/core/model.py +++ b/cobra/core/model.py @@ -15,6 +15,9 @@ from cobra.core.configuration import Configuration from cobra.core.dictlist import DictList +from cobra.core.gene import Gene +from cobra.core.group import Group +from cobra.core.metabolite import Metabolite from cobra.core.object import Object from cobra.core.reaction import Reaction from cobra.core.solution import get_solution @@ -55,12 +58,16 @@ class Model(Object): genes : DictList A DictList where the key is the gene identifier and the value a Gene + groups : DictList + A DictList where the key is the group identifier and the value a + Group solution : Solution The last obtained solution from optimizing the model. """ def __setstate__(self, state): - """Make sure all cobra.Objects in the model point to the model.""" + """Make sure all cobra.Objects in the model point to the model. + """ self.__dict__.update(state) for y in ['reactions', 'genes', 'metabolites']: for x in getattr(self, y): @@ -93,6 +100,7 @@ def __init__(self, id_or_model=None, name=None): self.genes = DictList() self.reactions = DictList() # A list of cobra.Reactions self.metabolites = DictList() # A list of cobra.Metabolites + self.groups = DictList() # A list of cobra.Groups # genes based on their ids {Gene.id: Gene} self._compartments = {} self._contexts = [] @@ -281,7 +289,7 @@ def copy(self): """ new = self.__class__() do_not_copy_by_ref = {"metabolites", "reactions", "genes", "notes", - "annotation"} + "annotation", "groups"} for attr in self.__dict__: if attr not in do_not_copy_by_ref: new.__dict__[attr] = self.__dict__[attr] @@ -327,6 +335,39 @@ def copy(self): new_gene = new.genes.get_by_id(gene.id) new_reaction._genes.add(new_gene) new_gene._reaction.add(new_reaction) + + new.groups = DictList() + do_not_copy_by_ref = {"_model", "_members"} + # Groups can be members of other groups. We initialize them first and + # then update their members. + for group in self.groups: + new_group = group.__class__() + for attr, value in iteritems(group.__dict__): + if attr not in do_not_copy_by_ref: + new_group.__dict__[attr] = copy(value) + new_group._model = new + new.groups.append(new_group) + for group in self.groups: + new_group = new.groups[group.id] + # update awareness, as in the reaction copies + new_objects = [] + for member in group.members: + if isinstance(member, Metabolite): + new_object = new.metabolites.get_by_id(member.id) + elif isinstance(member, Reaction): + new_object = new.reactions.get_by_id(member.id) + elif isinstance(member, Gene): + new_object = new.genes.get_by_id(member.id) + elif isinstance(member, Group): + new_object = new.genes.get_by_id(member.id) + else: + raise TypeError( + "The group member {!r} is unexpectedly not a " + "metabolite, reaction, gene, nor another " + "group.".format(member)) + new_objects.append(new_object) + new_group.add_members(new_objects) + try: new._solver = deepcopy(self.solver) # Cplex has an issue with deep copies @@ -409,6 +450,11 @@ def remove_metabolites(self, metabolite_list, destructive=False): for x in metabolite_list: x._model = None + # remove reference to the metabolite in all groups + associated_groups = self.get_associated_groups(x) + for group in associated_groups: + group.remove_members(x) + if not destructive: for the_reaction in list(x._reaction): the_coefficient = the_reaction._metabolites[x] @@ -505,6 +551,7 @@ def add_boundary(self, metabolite, type="exchange", reaction_id=None, (0, 1000.0) >>> demand.build_reaction_string() 'atp_c --> ' + """ ub = CONFIGURATION.upper_bound if ub is None else ub lb = CONFIGURATION.lower_bound if lb is None else lb @@ -684,6 +731,100 @@ def remove_reactions(self, reactions, remove_orphans=False): if context: context(partial(self.genes.add, gene)) + # remove reference to the reaction in all groups + associated_groups = self.get_associated_groups(reaction) + for group in associated_groups: + group.remove_members(reaction) + + def add_groups(self, group_list): + """Add groups to the model. + + Groups with identifiers identical to a group already in the model are + ignored. + + If any group contains members that are not in the model, these members + are added to the model as well. Only metabolites, reactions, and genes + can have groups. + + Parameters + ---------- + group_list : list + A list of `cobra.Group` objects to add to the model. + """ + + def existing_filter(group): + if group.id in self.groups: + LOGGER.warning( + "Ignoring group '%s' since it already exists.", group.id) + return False + return True + + if isinstance(group_list, string_types) or \ + hasattr(group_list, "id"): + warn("need to pass in a list") + group_list = [group_list] + + pruned = DictList(filter(existing_filter, group_list)) + + for group in pruned: + group._model = self + for member in group.members: + # If the member is not associated with the model, add it + if isinstance(member, Metabolite): + if member not in self.metabolites: + self.add_metabolites([member]) + if isinstance(member, Reaction): + if member not in self.reactions: + self.add_reactions([member]) + # TODO(midnighter): `add_genes` method does not exist. + # if isinstance(member, Gene): + # if member not in self.genes: + # self.add_genes([member]) + + self.groups += [group] + + def remove_groups(self, group_list): + """Remove groups from the model. + + Members of each group are not removed + from the model (i.e. metabolites, reactions, and genes in the group + stay in the model after any groups containing them are removed). + + Parameters + ---------- + group_list : list + A list of `cobra.Group` objects to remove from the model. + """ + + if isinstance(group_list, string_types) or \ + hasattr(group_list, "id"): + warn("need to pass in a list") + group_list = [group_list] + + for group in group_list: + # make sure the group is in the model + if group.id not in self.groups: + LOGGER.warning("%r not in %r. Ignored.", group, self) + else: + self.groups.remove(group) + group._model = None + + def get_associated_groups(self, element): + """Returns a list of groups that an element (reaction, metabolite, gene) + is associated with. + + Parameters + ---------- + element: `cobra.Reaction`, `cobra.Metabolite`, or `cobra.Gene` + + Returns + ------- + list of `cobra.Group` + All groups that the provided object is a member of + """ + # check whether the element is associated with the model + return [g for g in self.groups if element in g.members] + def add_cons_vars(self, what, **kwargs): """Add constraints and variables to the model's mathematical problem. @@ -922,6 +1063,7 @@ def repair(self, rebuild_index=True, rebuild_relationships=True): self.reactions._generate_index() self.metabolites._generate_index() self.genes._generate_index() + self.groups._generate_index() if rebuild_relationships: for met in self.metabolites: met._reaction.clear() @@ -932,8 +1074,9 @@ def repair(self, rebuild_index=True, rebuild_relationships=True): met._reaction.add(rxn) for gene in rxn._genes: gene._reaction.add(rxn) + # point _model to self - for l in (self.reactions, self.genes, self.metabolites): + for l in (self.reactions, self.genes, self.metabolites, self.groups): for e in l: e._model = self @@ -1108,6 +1251,9 @@ def _repr_html_(self):
key: value
+is read into the Object.notes dictionary when reading SBML files. +On writing the Object.notes dictionary is serialized to the SBML +notes information. + +Annotations are read in the Object.annotation fields. + +Some SBML related issues are still open, please refer to the respective issue: +- update annotation format and support qualifiers (depends on decision + for new annotation format; https://github.com/opencobra/cobrapy/issues/684) +- write compartment annotations and notes (depends on updated first-class + compartments; see https://github.com/opencobra/cobrapy/issues/760) +- support compression on file handles (depends on solution for + https://github.com/opencobra/cobrapy/issues/812) +""" from __future__ import absolute_import +import datetime +import logging +import os import re -from math import isinf, isnan -from os.path import isfile -from warnings import warn +import traceback +from collections import defaultdict, namedtuple +from copy import deepcopy +from sys import platform +from warnings import catch_warnings, simplefilter -from six import iteritems +from six import iteritems, string_types -from cobra.core import Configuration, Metabolite, Model, Reaction -from cobra.util.solver import set_objective +import cobra +import libsbml +from cobra.core import Gene, Group, Metabolite, Model, Reaction +from cobra.manipulation.validate import check_metabolite_compartment_formula +from cobra.util.solver import linear_reaction_coefficients, set_objective -try: - import libsbml -except ImportError: - libsbml = None +class CobraSBMLError(Exception): + """ SBML error class. """ + pass -CONFIGURATION = Configuration() +LOGGER = logging.getLogger(__name__) +# ----------------------------------------------------------------------------- +# Defaults and constants for writing SBML +# ----------------------------------------------------------------------------- +config = cobra.Configuration() # for default bounds +LOWER_BOUND_ID = "cobra_default_lb" +UPPER_BOUND_ID = "cobra_default_ub" +ZERO_BOUND_ID = "cobra_0_bound" -def parse_legacy_id(the_id, the_compartment=None, the_type='metabolite', - use_hyphens=False): - """Deals with a bunch of problems due to bigg.ucsd.edu not following SBML - standards +BOUND_MINUS_INF = "minus_inf" +BOUND_PLUS_INF = "plus_inf" + + +SBO_FBA_FRAMEWORK = "SBO:0000624" +SBO_DEFAULT_FLUX_BOUND = "SBO:0000626" +SBO_FLUX_BOUND = "SBO:0000625" +SBO_EXCHANGE_REACTION = "SBO:0000627" + +LONG_SHORT_DIRECTION = {'maximize': 'max', 'minimize': 'min'} +SHORT_LONG_DIRECTION = {'min': 'minimize', 'max': 'maximize'} + +Unit = namedtuple('Unit', ['kind', 'scale', 'multiplier', 'exponent']) +UNITS_FLUX = ("mmol_per_gDW_per_hr", + [ + Unit(kind=libsbml.UNIT_KIND_MOLE, scale=-3, multiplier=1, + exponent=1), + Unit(kind=libsbml.UNIT_KIND_GRAM, scale=0, multiplier=1, + exponent=-1), + Unit(kind=libsbml.UNIT_KIND_SECOND, scale=0, multiplier=3600, + exponent=-1) + ]) + +# ----------------------------------------------------------------------------- +# Functions for id replacements (import/export) +# ----------------------------------------------------------------------------- +SBML_DOT = "__SBML_DOT__" + + +def _clip(sid, prefix): + """Clips a prefix from the beginning of a string if it exists.""" + return sid[len(prefix):] if sid.startswith(prefix) else sid + + +def _f_gene(sid, prefix="G_"): + """Clips gene prefix from id.""" + sid = sid.replace(SBML_DOT, ".") + return _clip(sid, prefix) + + +def _f_gene_rev(sid, prefix="G_"): + """Adds gene prefix to id.""" + return prefix + sid.replace(".", SBML_DOT) + + +def _f_specie(sid, prefix="M_"): + """Clips specie/metabolite prefix from id.""" + return _clip(sid, prefix) + + +def _f_specie_rev(sid, prefix="M_"): + """Adds specie/metabolite prefix to id.""" + return prefix + sid + + +def _f_reaction(sid, prefix="R_"): + """Clips reaction prefix from id.""" + return _clip(sid, prefix) + + +def _f_reaction_rev(sid, prefix="R_"): + """Adds reaction prefix to id.""" + return prefix + sid + + +F_GENE = "F_GENE" +F_GENE_REV = "F_GENE_REV" +F_SPECIE = "F_SPECIE" +F_SPECIE_REV = "F_SPECIE_REV" +F_REACTION = "F_REACTION" +F_REACTION_REV = "F_REACTION_REV" + +F_REPLACE = { + F_GENE: _f_gene, + F_GENE_REV: _f_gene_rev, + F_SPECIE: _f_specie, + F_SPECIE_REV: _f_specie_rev, + F_REACTION: _f_reaction, + F_REACTION_REV: _f_reaction_rev, +} + + +# ----------------------------------------------------------------------------- +# Read SBML +# ----------------------------------------------------------------------------- +def read_sbml_model(filename, number=float, f_replace=F_REPLACE, **kwargs): + """Reads SBML model from given filename. + + If the given filename ends with the suffix ''.gz'' (for example, + ''myfile.xml.gz'),' the file is assumed to be compressed in gzip + format and will be automatically decompressed upon reading. Similarly, + if the given filename ends with ''.zip'' or ''.bz2',' the file is + assumed to be compressed in zip or bzip2 format (respectively). Files + whose names lack these suffixes will be read uncompressed. Note that + if the file is in zip format but the archive contains more than one + file, only the first file in the archive will be read and the rest + ignored. + + To read a gzip/zip file, libSBML needs to be configured and linked + with the zlib library at compile time. It also needs to be linked + with the bzip2 library to read files in bzip2 format. (Both of these + are the default configurations for libSBML.) + + This function supports SBML with FBC-v1 and FBC-v2. FBC-v1 models + are converted to FBC-v2 models before reading. + + The parser tries to fall back to information in notes dictionaries + if information is not available in the FBC packages, e.g., + CHARGE, FORMULA on species, or GENE_ASSOCIATION, SUBSYSTEM on reactions. + + Parameters + ---------- + filename : path to SBML file, or SBML string, or SBML file handle + SBML which is read into cobra model + number: data type of stoichiometry: {float, int} + In which data type should the stoichiometry be parsed. + f_replace : dict of replacement functions for id replacement + Dictionary of replacement functions for gene, specie, and reaction. + By default the following id changes are performed on import: + clip G_ from genes, clip M_ from species, clip R_ from reactions + If no replacements should be performed, set f_replace={}, None + + Returns + ------- + cobra.core.Model + + Notes + ----- + Provided file handles cannot be opened in binary mode, i.e., use + with open(path, "r" as f): + read_sbml_model(f) + File handles to compressed files are not supported yet. + """ + try: + doc = _get_doc_from_filename(filename) + return _sbml_to_model(doc, number=number, + f_replace=f_replace, **kwargs) + except IOError as e: + raise e + + except Exception: + LOGGER.error(traceback.print_exc()) + raise CobraSBMLError( + "Something went wrong reading the SBML model. Most likely the SBML" + " model is not valid. Please check that your model is valid using " + "the `cobra.io.sbml.validate_sbml_model` function or via the " + "online validator at http://sbml.org/validator .\n" + "\t`(model, errors) = validate_sbml_model(filename)`" + "\nIf the model is valid and cannot be read please open an issue " + "at https://github.com/opencobra/cobrapy/issues .") + + +def _get_doc_from_filename(filename): + """Get SBMLDocument from given filename. Parameters ---------- - the_id: String. - the_compartment: String - the_type: String - Currently only 'metabolite' is supported - use_hyphens: Boolean - If True, double underscores (__) in an SBML ID will be converted to - hyphens + filename : path to SBML, or SBML string, or filehandle Returns ------- - string: the identifier + libsbml.SBMLDocument """ - if use_hyphens: - the_id = the_id.replace('__', '-') - if the_type == 'metabolite': - if the_id.split('_')[-1] == the_compartment: - # Reformat Ids to match convention in Palsson Lab. - the_id = the_id[:-len(the_compartment) - 1] - the_id += '[%s]' % the_compartment - return the_id + if isinstance(filename, string_types): + if ("win" in platform) and (len(filename) < 260) \ + and os.path.exists(filename): + # path (win) + doc = libsbml.readSBMLFromFile(filename) # noqa: E501 type: libsbml.SBMLDocument + elif ("win" not in platform) and os.path.exists(filename): + # path other + doc = libsbml.readSBMLFromFile(filename) # noqa: E501 type: libsbml.SBMLDocument + else: + # string representation + if "' - end_tag = '
' - if '', '
' - if sbml_level > 2 or (sbml_level == 2 and sbml_version == 4): - note_start_tag, note_end_tag = '', '
', ':' - note_str = note_str.replace('(\'', note_start_tag) - note_str = note_str.replace('\']),', note_end_tag) - note_str = note_str.replace('\',', note_delimiter) - note_str = note_str.replace('\']', '') - note_str = note_str.replace('[\'', '') - note_str = note_str.replace( - '[', '') - note_str = note_str.replace(')]', note_end_tag + '') - sbml_reaction.setNotes(note_str) - - if use_fbc_package: - try: - from libsbml import ConversionProperties, LIBSBML_OPERATION_SUCCESS - conversion_properties = ConversionProperties() - conversion_properties.addOption( - "convert cobra", True, "Convert Cobra model") - result = sbml_doc.convert(conversion_properties) - if result != LIBSBML_OPERATION_SUCCESS: - raise Exception("Conversion of COBRA to SBML+fbc failed") - except Exception as e: - error_string = 'Error saving as SBML+fbc. %s' - try: - # Check whether the FbcExtension is there - from libsbml import FbcExtension - error_string = error_string % e - except ImportError: - error_string = error_string % "FbcExtension not available in " - "libsbml. If use_fbc_package == True then libsbml must be " - "compiled with the fbc extension." - from libsbml import getLibSBMLDottedVersion - _sbml_version = getLibSBMLDottedVersion() - _major, _minor, _patch = map(int, _sbml_version.split('.')) - if _major < 5 or (_major == 5 and _minor < 8): - error_string += "You've got libsbml %s installed. " - "You need 5.8.0 or later with the fbc package" - - raise Exception(error_string) - return sbml_doc - - -def add_sbml_species(sbml_model, cobra_metabolite, note_start_tag, - note_end_tag, boundary_metabolite=False): - """A helper function for adding cobra metabolites to an sbml model. + sref = reaction.createProduct() # noqa: E501 type: libsbml.SpeciesReference + sref.setSpecies(sid) + sref.setStoichiometry(stoichiometry) + sref.setConstant(True) + + # bounds + r_fbc = reaction.getPlugin("fbc") # type: libsbml.FbcReactionPlugin + r_fbc.setLowerFluxBound(_create_bound(model, cobra_reaction, + "lower_bound", + f_replace=f_replace, units=units, + flux_udef=flux_udef)) + r_fbc.setUpperFluxBound(_create_bound(model, cobra_reaction, + "upper_bound", + f_replace=f_replace, units=units, + flux_udef=flux_udef)) + + # GPR + gpr = cobra_reaction.gene_reaction_rule + if gpr is not None and len(gpr) > 0: + gpa = r_fbc.createGeneProductAssociation() # noqa: E501 type: libsbml.GeneProductAssociation + # replace ids + if f_replace and F_GENE_REV in f_replace: + tokens = gpr.split(' ') + for k in range(len(tokens)): + if tokens[k] not in ['and', 'or', '(', ')']: + tokens[k] = f_replace[F_GENE_REV](tokens[k]) + gpr = " ".join(tokens) + + gpa.setAssociation(gpr) + + # objective coefficients + if reaction_coefficients.get(cobra_reaction, 0) != 0: + flux_obj = objective.createFluxObjective() # noqa: E501 type: libsbml.FluxObjective + flux_obj.setReaction(rid) + flux_obj.setCoefficient(cobra_reaction.objective_coefficient) + + # write groups + if len(cobra_model.groups) > 0: + doc.enablePackage( + "http://www.sbml.org/sbml/level3/version1/groups/version1", + "groups", True) + doc.setPackageRequired("groups", False) + model_group = model.getPlugin("groups") # noqa: E501 type: libsbml.GroupsModelPlugin + for cobra_group in cobra_model.groups: + group = model_group.createGroup() # type: libsbml.Group + group.setId(cobra_group.id) + group.setName(cobra_group.name) + group.setKind(cobra_group.kind) + + _sbase_notes_dict(group, cobra_group.notes) + _sbase_annotations(group, cobra_group.annotation) + + for cobra_member in cobra_group.members: + member = group.createMember() # type: libsbml.Member + mid = cobra_member.id + m_type = str(type(cobra_member)) + + # id replacements + if "Reaction" in m_type: + if f_replace and F_REACTION_REV in f_replace: + mid = f_replace[F_REACTION_REV](mid) + if "Metabolite" in m_type: + if f_replace and F_SPECIE_REV in f_replace: + mid = f_replace[F_SPECIE_REV](mid) + if "Gene" in m_type: + if f_replace and F_GENE_REV in f_replace: + mid = f_replace[F_GENE_REV](mid) + + member.setIdRef(mid) + if cobra_member.name and len(cobra_member.name) > 0: + member.setName(cobra_member.name) + + return doc + + +def _create_bound(model, reaction, bound_type, f_replace, units=None, + flux_udef=None): + """Creates bound in model for given reaction. + + Adds the parameters for the bounds to the SBML model. Parameters ---------- - sbml_model: sbml_model object + model : libsbml.Model + SBML model instance + reaction : cobra.core.Reaction + Cobra reaction instance from which the bounds are read. + bound_type : {LOWER_BOUND, UPPER_BOUND} + Type of bound + f_replace : dict of id replacement functions + units : flux units - cobra_metabolite: a cobra.Metabolite object - - note_start_tag: string - the start tag for parsing cobra notes. this will eventually - be supplanted when COBRA is worked into sbml. + Returns + ------- + Id of bound parameter. + """ + value = getattr(reaction, bound_type) + if value == config.lower_bound: + return LOWER_BOUND_ID + elif value == 0: + return ZERO_BOUND_ID + elif value == config.upper_bound: + return UPPER_BOUND_ID + elif value == -float("Inf"): + return BOUND_MINUS_INF + elif value == float("Inf"): + return BOUND_PLUS_INF + else: + # new parameter + rid = reaction.id + if f_replace and F_REACTION_REV in f_replace: + rid = f_replace[F_REACTION_REV](rid) + pid = rid + "_" + bound_type + _create_parameter(model, pid=pid, value=value, sbo=SBO_FLUX_BOUND, + units=units, flux_udef=flux_udef) + return pid + + +def _create_parameter(model, pid, value, sbo=None, constant=True, units=None, + flux_udef=None): + """Create parameter in SBML model.""" + parameter = model.createParameter() # type: libsbml.Parameter + parameter.setId(pid) + parameter.setValue(value) + parameter.setConstant(constant) + if sbo: + parameter.setSBOTerm(sbo) + if units: + parameter.setUnits(flux_udef.getId()) + + +def _check_required(sbase, value, attribute): + """Get required attribute from the SBase. - note_end_tag: string - the end tag for parsing cobra notes. this will eventually - be supplanted when COBRA is worked into sbml. - boundary_metabolite: bool - if metabolite boundary condition should be set or not + Parameters + ---------- + sbase : libsbml.SBase + value : existing value + attribute: name of attribute Returns ------- - string: the created metabolite identifier + attribute value (or value if already set) """ - sbml_species = sbml_model.createSpecies() - the_id = 'M_' + cobra_metabolite.id.replace('-', '__') - # Deal with legacy naming issues - the_compartment = cobra_metabolite.compartment - if the_id.endswith('[%s]' % the_compartment): - the_id = the_id[:-len('[%s]' % the_compartment)] - elif not the_id.endswith('_%s' % the_compartment): - the_id += '_%s' % the_compartment - if boundary_metabolite: - the_id += '_boundary' - sbml_species.setId(the_id) - metabolite_id = the_id - if boundary_metabolite: - sbml_species.setBoundaryCondition(True) - if cobra_metabolite.name: - sbml_species.setName(cobra_metabolite.name) - else: - sbml_species.setName(cobra_metabolite.id) - if the_compartment is not None: - try: - sbml_species.setCompartment(the_compartment) - except: - warn('metabolite failed: ' + the_id) - return cobra_metabolite - if cobra_metabolite.charge is not None: - sbml_species.setCharge(cobra_metabolite.charge) - # Deal with cases where the formula in the model is not an object but as a - # string - if cobra_metabolite.formula or cobra_metabolite.notes: - tmp_note = '' - if hasattr(cobra_metabolite.formula, 'id'): - tmp_note += '%sFORMULA: %s%s' % (note_start_tag, - cobra_metabolite.formula.id, - note_end_tag) + + if not value: + msg = "required attribute '%s' not found in '%s'" % \ + (attribute, sbase) + if hasattr(sbase, "getId") and sbase.getId(): + msg += " with id '%s'" % sbase.getId() + elif hasattr(sbase, "getName") and sbase.getName(): + msg += " with name '%s'" % sbase.getName() + raise CobraSBMLError(msg) + return value + + +def _check(value, message): + """ + Checks the libsbml return value and logs error messages. + + If 'value' is None, logs an error message constructed using + 'message' and then exits with status code 1. If 'value' is an integer, + it assumes it is a libSBML return status code. If the code value is + LIBSBML_OPERATION_SUCCESS, returns without further action; if it is not, + prints an error message constructed using 'message' along with text from + libSBML explaining the meaning of the code, and exits with status code 1. + + """ + if value is None: + LOGGER.error('Error: LibSBML returned a null value trying ' + 'to <' + message + '>.') + elif type(value) is int: + if value == libsbml.LIBSBML_OPERATION_SUCCESS: + return else: - tmp_note += '%sFORMULA: %s%s' % (note_start_tag, - cobra_metabolite.formula, - note_end_tag) - if hasattr(cobra_metabolite.notes, 'items'): - for the_id_type, the_id in cobra_metabolite.notes.items(): - if the_id_type.lower() == 'charge': - # Use of notes['CHARGE'] has been deprecated in favor of - # metabolite.charge - continue - if not isinstance(the_id_type, str): - the_id_type = str(the_id_type) - if hasattr(the_id, '__iter__') and len(the_id) == 1: - the_id = the_id[0] - if not isinstance(the_id, str): - the_id = str(the_id) - tmp_note += '%s%s: %s%s' % (note_start_tag, - the_id_type, - the_id, note_end_tag) - sbml_species.setNotes(tmp_note + '') - return metabolite_id - - -def fix_legacy_id(id, use_hyphens=False, fix_compartments=False): - id = id.replace('_DASH_', '__') - id = id.replace('_FSLASH_', '/') - id = id.replace('_BSLASH_', "\\") - id = id.replace('_LPAREN_', '(') - id = id.replace('_LSQBKT_', '[') - id = id.replace('_RSQBKT_', ']') - id = id.replace('_RPAREN_', ')') - id = id.replace('_COMMA_', ',') - id = id.replace('_PERIOD_', '.') - id = id.replace('_APOS_', "'") - id = id.replace('&', '&') - id = id.replace('<', '<') - id = id.replace('>', '>') - id = id.replace('"', '"') - if use_hyphens: - id = id.replace('__', '-') + LOGGER.error('Error encountered trying to <' + message + '>.') + LOGGER.error('LibSBML error code {}: {}'.format(str(value), + libsbml.OperationReturnValue_toString(value).strip())) + else: + return + + +# ----------------------------------------------------------------------------- +# Notes +# ----------------------------------------------------------------------------- +def _parse_notes_dict(sbase): + """ Creates dictionary of COBRA notes. + + Parameters + ---------- + sbase : libsbml.SBase + + Returns + ------- + dict of notes + """ + notes = sbase.getNotesString() + if notes and len(notes) > 0: + pattern = r"\s*(\w+\s*\w*)\s*:\s*([\w|\s]+)<" + matches = re.findall(pattern, notes) + d = {k.strip(): v.strip() for (k, v) in matches} + return {k: v for k, v in d.items() if len(v) > 0} else: - id = id.replace("-", "__") - if fix_compartments: - if len(id) > 2: - if (id[-3] == "(" and id[-1] == ")") or \ - (id[-3] == "[" and id[-1] == "]"): - id = id[:-3] + "_" + id[-2] - return id - - -def read_legacy_sbml(filename, use_hyphens=False): - """read in an sbml file and fix the sbml id's""" - model = create_cobra_model_from_sbml_file(filename, old_sbml=True) - for metabolite in model.metabolites: - metabolite.id = fix_legacy_id(metabolite.id) - model.metabolites._generate_index() - for reaction in model.reactions: - reaction.id = fix_legacy_id(reaction.id) - if reaction.id.startswith("EX_") and reaction.id.endswith("(e)"): - reaction.id = reaction.id[:-3] + "_e" - model.reactions._generate_index() - # remove boundary metabolites (end in _b and only present in exchanges) - for metabolite in list(model.metabolites): - if not metabolite.id.endswith("_b"): + return {} + + +def _sbase_notes_dict(sbase, notes): + """Set SBase notes based on dictionary. + + Parameters + ---------- + sbase : libsbml.SBase + SBML object to set notes on + notes : notes object + notes information from cobra object + """ + if notes and len(notes) > 0: + tokens = [''] + \ + ["
{}: {}
".format(k, v) for (k, v) in notes.items()] + \ + [""] + _check( + sbase.setNotes("\n".join(tokens)), + "Setting notes on sbase: {}".format(sbase) + ) + + +# ----------------------------------------------------------------------------- +# Annotations +# ----------------------------------------------------------------------------- +""" +cobra annotations will be dictionaries of the form: + object.annotation = { + 'provider' : [(qualifier, entity), ...] + } +A concrete example for a metabolite would look like the following + metabolite.annotation = { + 'chebi': [(isVersionOf, "CHEBI:17234), (is, "CHEBI:4167),], + 'kegg.compound': [(is, "C00031")] + } +The providers are hereby MIRIAM registry keys for collections +https://www.ebi.ac.uk/miriam/main/collections +The qualifiers are biomodel qualifiers +https://co.mbine.org/standards/qualifiers + +In the current stage the new annotation format is not completely supported yet. +""" +URL_IDENTIFIERS_PATTERN = r"^http[s]{0,1}://identifiers.org/(.+)/(.+)" +URL_IDENTIFIERS_PREFIX = r"https://identifiers.org" +QUALIFIER_TYPES = { + "is": libsbml.BQB_IS, + "hasPart": libsbml.BQB_HAS_PART, + "isPartOf": libsbml.BQB_IS_PART_OF, + "isVersionOf": libsbml.BQB_IS_VERSION_OF, + "hasVersion": libsbml.BQB_HAS_VERSION, + "isHomologTo": libsbml.BQB_IS_HOMOLOG_TO, + "isDescribedBy": libsbml.BQB_IS_DESCRIBED_BY, + "isEncodedBy": libsbml.BQB_IS_ENCODED_BY, + "encodes": libsbml.BQB_ENCODES, + "occursIn": libsbml.BQB_OCCURS_IN, + "hasProperty": libsbml.BQB_HAS_PROPERTY, + "isPropertyOf": libsbml.BQB_IS_PROPERTY_OF, + "hasTaxon": libsbml.BQB_HAS_TAXON, + "unknown": libsbml.BQB_UNKNOWN, + "bqm_is": libsbml.BQM_IS, + "bqm_isDescribedBy": libsbml.BQM_IS_DESCRIBED_BY, + "bqm_isDerivedFrom": libsbml.BQM_IS_DERIVED_FROM, + "bqm_isInstanceOf": libsbml.BQM_IS_INSTANCE_OF, + "bqm_hasInstance": libsbml.BQM_HAS_INSTANCE, + "bqm_unknown": libsbml.BQM_UNKNOWN, +} + + +def _parse_annotations(sbase): + """Parses cobra annotations from a given SBase object. + + Annotations are dictionaries with the providers as keys. + + Parameters + ---------- + sbase : libsbml.SBase + SBase from which the SBML annotations are read + + Returns + ------- + dict (annotation dictionary) + + FIXME: annotation format must be updated + (https://github.com/opencobra/cobrapy/issues/684) + """ + annotation = {} + + # SBO term + if sbase.isSetSBOTerm(): + # FIXME: correct handling of annotations + annotation["sbo"] = sbase.getSBOTermID() + + # RDF annotation + cvterms = sbase.getCVTerms() + if cvterms is None: + return annotation + + for cvterm in cvterms: # type: libsbml.CVTerm + for k in range(cvterm.getNumResources()): + uri = cvterm.getResourceURI(k) + + # FIXME: read and store the qualifier + tokens = uri.split('/') + if len(tokens) != 5 or not tokens[2] == "identifiers.org": + LOGGER.warning("%s does not conform to " + "http(s)://identifiers.org/collection/id", uri) + continue + + provider, identifier = tokens[3], tokens[4] + if provider in annotation: + if isinstance(annotation[provider], string_types): + annotation[provider] = [annotation[provider]] + annotation[provider].append(identifier) + else: + # FIXME: always in list + annotation[provider] = identifier + + return annotation + + +def _sbase_annotations(sbase, annotation): + """Set SBase annotations based on cobra annotations. + + Parameters + ---------- + sbase : libsbml.SBase + SBML object to annotate + annotation : cobra annotation structure + cobra object with annotation information + + FIXME: annotation format must be updated + (https://github.com/opencobra/cobrapy/issues/684) + """ + + if not annotation or len(annotation) == 0: + return + + # standardize annotations + annotation_data = deepcopy(annotation) + for key, value in annotation_data.items(): + if isinstance(value, string_types): + annotation_data[key] = [("is", value)] + + for key, value in annotation_data.items(): + for idx, item in enumerate(value): + if isinstance(item, string_types): + value[idx] = ("is", item) + + # set metaId + meta_id = "meta_{}".format(sbase.getId()) + sbase.setMetaId(meta_id) + + # rdf_items = [] + for provider, data in iteritems(annotation_data): + + # set SBOTerm + if provider in ["SBO", "sbo"]: + if provider == "SBO": + logging.warning("'SBO' provider is deprecated, " + "use 'sbo' provider instead") + sbo_term = data[0][1] + _check(sbase.setSBOTerm(sbo_term), + "Setting SBOTerm: {}".format(sbo_term)) + + # FIXME: sbo should also be written as CVTerm continue - if len(metabolite._reaction) == 1: - if list(metabolite._reaction)[0].id.startswith("EX_"): - metabolite.remove_from_model() - model.metabolites._generate_index() - return model + + for item in data: + qualifier_str, entity = item[0], item[1] + qualifier = QUALIFIER_TYPES.get(qualifier_str, None) + if qualifier is None: + qualifier = libsbml.BQB_IS + LOGGER.error("Qualifier type is not supported on " + "annotation: '{}'".format(qualifier_str)) + + qualifier_type = libsbml.BIOLOGICAL_QUALIFIER + if qualifier_str.startswith("bqm_"): + qualifier_type = libsbml.MODEL_QUALIFIER + + cv = libsbml.CVTerm() # type: libsbml.CVTerm + cv.setQualifierType(qualifier_type) + if qualifier_type == libsbml.BIOLOGICAL_QUALIFIER: + cv.setBiologicalQualifierType(qualifier) + elif qualifier_type == libsbml.MODEL_QUALIFIER: + cv.setModelQualifierType(qualifier) + else: + raise CobraSBMLError('Unsupported qualifier: ' + '%s' % qualifier) + resource = "%s/%s/%s" % (URL_IDENTIFIERS_PREFIX, provider, entity) + cv.addResource(resource) + _check(sbase.addCVTerm(cv), + "Setting cvterm: {}, resource: {}".format(cv, resource)) + + +# ----------------------------------------------------------------------------- +# Validation +# ----------------------------------------------------------------------------- +def validate_sbml_model(filename, + check_model=True, + internal_consistency=True, + check_units_consistency=False, + check_modeling_practice=False): + """Validate SBML model and returns the model along with a list of errors. + + Parameters + ---------- + filename : str + The filename (or SBML string) of the SBML model to be validated. + internal_consistency: boolean {True, False} + Check internal consistency. + check_units_consistency: boolean {True, False} + Check consistency of units. + check_modeling_practice: boolean {True, False} + Check modeling practise. + check_model: boolean {True, False} + Whether to also check some basic model properties such as reaction + boundaries and compartment formulas. + + Returns + ------- + (model, errors) + model : :class:`~cobra.core.Model.Model` object + The cobra model if the file could be read successfully or None + otherwise. + errors : dict + Warnings and errors grouped by their respective types. + + Raises + ------ + CobraSBMLError + """ + # store errors + errors = {key: [] for key in ("validator", "warnings", "other", + "SBML errors")} + + for key in ["SBML_FATAL", "SBML ERROR", "SBML_SCHEMA_ERROR", + "SBML_WARNING"]: + errors[key] = [] + + # make sure there is exactly one model + doc = _get_doc_from_filename(filename) + model = doc.getModel() # type: libsbml.Model + if model is None: + raise CobraSBMLError("No SBML model detected in file.") + + # set checking of units & modeling practise + doc.setConsistencyChecks(libsbml.LIBSBML_CAT_UNITS_CONSISTENCY, + check_units_consistency) + doc.setConsistencyChecks(libsbml.LIBSBML_CAT_MODELING_PRACTICE, + check_modeling_practice) + + # check internal consistency + if internal_consistency: + doc.checkInternalConsistency() + doc.checkConsistency() + + for k in range(doc.getNumErrors()): + e = doc.getError(k) + msg = _error_string(e, k=k) + sev = e.getSeverity() + if sev == libsbml.LIBSBML_SEV_FATAL: + errors["SBML_FATAL"].append(msg) + elif sev == libsbml.LIBSBML_SEV_ERROR: + errors["SBML_ERROR"].append(msg) + elif sev == libsbml.LIBSBML_SEV_SCHEMA_ERROR: + errors["SBML_SCHEMA_ERROR"].append(msg) + elif sev == libsbml.LIBSBML_SEV_WARNING: + errors["SBML_WARNING"].append(msg) + + # ensure can be made into model + # all warnings generated while loading will be logged as errors + with catch_warnings(record=True) as warning_list: + simplefilter("always") + try: + model = _sbml_to_model(doc) + except CobraSBMLError as e: + errors["SBML errors"].append(str(e)) + return None, errors + except Exception as e: + errors["other"].append(str(e)) + return None, errors + + errors["warnings"].extend(str(i.message) for i in warning_list) + + if check_model: + errors["validator"].extend(check_metabolite_compartment_formula(model)) + + return model, errors + + +def _error_string(error, k=None): + """String representation of SBMLError. + + Parameters + ---------- + error : libsbml.SBMLError + k : index of error + + Returns + ------- + string representation of error + """ + package = error.getPackage() + if package == '': + package = 'core' + + error_str = '{}\n' \ + 'E{}: {} ({}, L{}, {})\n' \ + '{}\n' \ + '[{}] {}\n' \ + '{}'.format( + '-' * 60, + k, error.getCategoryAsString(), package, error.getLine(), + 'code', + '-' * 60, + error.getSeverityAsString(), error.getShortMessage(), + error.getMessage()) + return error_str diff --git a/cobra/io/sbml3.py b/cobra/io/sbml3.py deleted file mode 100644 index 3433048ed..000000000 --- a/cobra/io/sbml3.py +++ /dev/null @@ -1,759 +0,0 @@ -# -*- coding: utf-8 -*- - -from __future__ import absolute_import - -import re -from ast import And, BoolOp, Name, Or -from bz2 import BZ2File -from collections import defaultdict -from decimal import Decimal -from gzip import GzipFile -from tempfile import NamedTemporaryFile -from warnings import catch_warnings, simplefilter, warn - -from six import iteritems, string_types - -from cobra.core import Configuration, Gene, Metabolite, Model, Reaction -from cobra.core.gene import parse_gpr -from cobra.manipulation.modify import _renames -from cobra.manipulation.validate import check_metabolite_compartment_formula -from cobra.util.solver import set_objective - - -try: - from lxml.etree import ( - parse, Element, SubElement, ElementTree, register_namespace, - ParseError, XPath) - - _with_lxml = True -except ImportError: - _with_lxml = False - try: - from xml.etree.cElementTree import ( - parse, Element, SubElement, ElementTree, register_namespace, - ParseError) - except ImportError: - XPath = None - from xml.etree.ElementTree import ( - parse, Element, SubElement, ElementTree, register_namespace, - ParseError) - -# use sbml level 2 from sbml.py (which uses libsbml). Eventually, it would -# be nice to use the libSBML converters directly instead. -try: - import libsbml -except ImportError: - libsbml = None -else: - from cobra.io.sbml import create_cobra_model_from_sbml_file as read_sbml2 - from cobra.io.sbml import write_cobra_model_to_sbml_file as write_sbml2 - -try: - from optlang.symbolics import Basic -except ImportError: - class Basic: - pass - - -CONFIGURATION = Configuration() - -# deal with namespaces -namespaces = {"fbc": "http://www.sbml.org/sbml/level3/version1/fbc/version2", - "sbml": "http://www.sbml.org/sbml/level3/version1/core", - "rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#", - "bqbiol": "http://biomodels.net/biology-qualifiers/"} -for key in namespaces: - register_namespace(key, namespaces[key]) - - -def ns(query): - """replace prefixes with namespace""" - for prefix, uri in iteritems(namespaces): - query = query.replace(prefix + ":", "{" + uri + "}") - return query - - -# XPATH query wrappers -fbc_prefix = "{" + namespaces["fbc"] + "}" -sbml_prefix = "{" + namespaces["sbml"] + "}" - -SBML_DOT = "__SBML_DOT__" -# FBC TAGS -OR_TAG = ns("fbc:or") -AND_TAG = ns("fbc:and") -GENEREF_TAG = ns("fbc:geneProductRef") -GPR_TAG = ns("fbc:geneProductAssociation") -GENELIST_TAG = ns("fbc:listOfGeneProducts") -GENE_TAG = ns("fbc:geneProduct") -# XPATHS -BOUND_XPATH = ns("sbml:listOfParameters/sbml:parameter[@value]") -COMPARTMENT_XPATH = ns("sbml:listOfCompartments/sbml:compartment") -GENES_XPATH = GENELIST_TAG + "/" + GENE_TAG -SPECIES_XPATH = ns("sbml:listOfSpecies/sbml:species[@boundaryCondition='%s']") -OBJECTIVES_XPATH = ns("fbc:objective[@fbc:id='%s']/" - "fbc:listOfFluxObjectives/" - "fbc:fluxObjective") -LONG_SHORT_DIRECTION = {'maximize': 'max', 'minimize': 'min'} -SHORT_LONG_DIRECTION = {'min': 'minimize', 'max': 'maximize'} - -if _with_lxml: - RDF_ANNOTATION_XPATH = ("sbml:annotation/rdf:RDF/" - "rdf:Description[@rdf:about=$metaid]/" - "*[self::bqbiol:isEncodedBy or self::bqbiol:is]/" - "rdf:Bag/rdf:li/@rdf:resource") - extract_rdf_annotation = XPath(RDF_ANNOTATION_XPATH, namespaces=namespaces, - smart_strings=False) -else: - RDF_ANNOTATION_XPATH = ns("sbml:annotation/rdf:RDF/" - "rdf:Description[@rdf:about='%s']/" - "bqbiol:isEncodedBy/" - "rdf:Bag/rdf:li[@rdf:resource]") - - def extract_rdf_annotation(sbml_element, metaid): - search_xpath = RDF_ANNOTATION_XPATH % metaid - for i in sbml_element.iterfind(search_xpath): - yield get_attrib(i, "rdf:resource") - for i in sbml_element.iterfind(search_xpath.replace( - "isEncodedBy", "is")): - yield get_attrib(i, "rdf:resource") - - -class CobraSBMLError(Exception): - pass - - -def get_attrib(tag, attribute, type=lambda x: x, require=False): - value = tag.get(ns(attribute)) - if require and value is None: - msg = "required attribute '%s' not found in tag '%s'" % \ - (attribute, tag.tag) - if tag.get("id") is not None: - msg += " with id '%s'" % tag.get("id") - elif tag.get("name") is not None: - msg += " with name '%s'" % tag.get("name") - raise CobraSBMLError(msg) - return type(value) if value is not None else None - - -def set_attrib(xml, attribute_name, value): - if value is None or value == "": - return - xml.set(ns(attribute_name), str(value)) - - -def parse_stream(filename): - """parses filename or compressed stream to xml""" - try: - if hasattr(filename, "read"): - return parse(filename) - elif filename.endswith(".gz"): - with GzipFile(filename) as infile: - return parse(infile) - elif filename.endswith(".bz2"): - with BZ2File(filename) as infile: - return parse(infile) - else: - return parse(filename) - except ParseError as e: - raise CobraSBMLError("Malformed XML file: " + str(e)) - - -# string utility functions -def clip(string, prefix): - """clips a prefix from the beginning of a string if it exists - - >>> clip("R_pgi", "R_") - "pgi" - - """ - return string[len(prefix):] if string.startswith(prefix) else string - - -def strnum(number): - """Utility function to convert a number to a string""" - if isinstance(number, (Decimal, Basic, str)): - return str(number) - s = "%.15g" % number - return s.rstrip(".") - - -def construct_gpr_xml(parent, expression): - """create gpr xml under parent node""" - if isinstance(expression, BoolOp): - op = expression.op - if isinstance(op, And): - new_parent = SubElement(parent, AND_TAG) - elif isinstance(op, Or): - new_parent = SubElement(parent, OR_TAG) - else: - raise Exception("unsupported operation " + op.__class__) - for arg in expression.values: - construct_gpr_xml(new_parent, arg) - elif isinstance(expression, Name): - gene_elem = SubElement(parent, GENEREF_TAG) - set_attrib(gene_elem, "fbc:geneProduct", "G_" + expression.id) - else: - raise Exception("unsupported operation " + repr(expression)) - - -def annotate_cobra_from_sbml(cobra_element, sbml_element): - sbo_term = sbml_element.get("sboTerm") - if sbo_term is not None: - cobra_element.annotation["sbo"] = sbo_term - meta_id = get_attrib(sbml_element, "metaid") - if meta_id is None: - return - annotation = cobra_element.annotation - for uri in extract_rdf_annotation(sbml_element, metaid="#" + meta_id): - if not uri.startswith("http://identifiers.org/"): - warn("%s does not start with http://identifiers.org/" % uri) - continue - try: - provider, identifier = uri[23:].split("/", 1) - except ValueError: - warn("%s does not conform to http://identifiers.org/provider/id" - % uri) - continue - # handle multiple id's in the same database - if provider in annotation: - # make into a list if necessary - if isinstance(annotation[provider], string_types): - annotation[provider] = [annotation[provider]] - annotation[provider].append(identifier) - else: - cobra_element.annotation[provider] = identifier - - -def annotate_sbml_from_cobra(sbml_element, cobra_element): - if len(cobra_element.annotation) == 0: - return - # get the id so we can set the metaid - tag = sbml_element.tag - if tag.startswith(sbml_prefix) or tag[0] != "{": - prefix = "" - elif tag.startswith(fbc_prefix): - prefix = fbc_prefix - else: - raise ValueError("Can not annotate " + repr(sbml_element)) - id = sbml_element.get(prefix + "id") - if len(id) == 0: - raise ValueError("%s does not have id set" % repr(sbml_element)) - set_attrib(sbml_element, "metaid", id) - annotation = SubElement(sbml_element, ns("sbml:annotation")) - rdf_desc = SubElement(SubElement(annotation, ns("rdf:RDF")), - ns("rdf:Description")) - set_attrib(rdf_desc, "rdf:about", "#" + id) - bag = SubElement(SubElement(rdf_desc, ns("bqbiol:is")), - ns("rdf:Bag")) - for provider, identifiers in sorted(iteritems(cobra_element.annotation)): - if provider == "sbo": - set_attrib(sbml_element, "sboTerm", identifiers) - continue - if isinstance(identifiers, string_types): - identifiers = (identifiers,) - for identifier in identifiers: - li = SubElement(bag, ns("rdf:li")) - set_attrib(li, "rdf:resource", "http://identifiers.org/%s/%s" % - (provider, identifier)) - - -def parse_xml_into_model(xml, number=float): - xml_model = xml.find(ns("sbml:model")) - if get_attrib(xml_model, "fbc:strict") != "true": - warn('loading SBML model without fbc:strict="true"') - - model_id = get_attrib(xml_model, "id") - model = Model(model_id) - model.name = xml_model.get("name") - - model.compartments = {c.get("id"): c.get("name") for c in - xml_model.findall(COMPARTMENT_XPATH)} - # add metabolites - for species in xml_model.findall(SPECIES_XPATH % 'false'): - met = get_attrib(species, "id", require=True) - met = Metabolite(clip(met, "M_")) - met.name = species.get("name") - annotate_cobra_from_sbml(met, species) - met.compartment = species.get("compartment") - met.charge = get_attrib(species, "fbc:charge", int) - met.formula = get_attrib(species, "fbc:chemicalFormula") - model.add_metabolites([met]) - # Detect boundary metabolites - In case they have been mistakenly - # added. They should not actually appear in a model - boundary_metabolites = {clip(i.get("id"), "M_") - for i in xml_model.findall(SPECIES_XPATH % 'true')} - - # add genes - for sbml_gene in xml_model.iterfind(GENES_XPATH): - gene_id = get_attrib(sbml_gene, "fbc:id").replace(SBML_DOT, ".") - gene = Gene(clip(gene_id, "G_")) - gene.name = get_attrib(sbml_gene, "fbc:name") - if gene.name is None: - gene.name = get_attrib(sbml_gene, "fbc:label") - annotate_cobra_from_sbml(gene, sbml_gene) - model.genes.append(gene) - - def process_gpr(sub_xml): - """recursively convert gpr xml to a gpr string""" - if sub_xml.tag == OR_TAG: - return "( " + ' or '.join(process_gpr(i) for i in sub_xml) + " )" - elif sub_xml.tag == AND_TAG: - return "( " + ' and '.join(process_gpr(i) for i in sub_xml) + " )" - elif sub_xml.tag == GENEREF_TAG: - gene_id = get_attrib(sub_xml, "fbc:geneProduct", require=True) - return clip(gene_id, "G_") - else: - raise Exception("unsupported tag " + sub_xml.tag) - - bounds = {bound.get("id"): get_attrib(bound, "value", type=number) - for bound in xml_model.iterfind(BOUND_XPATH)} - # add reactions - reactions = [] - for sbml_reaction in xml_model.iterfind( - ns("sbml:listOfReactions/sbml:reaction")): - reaction = get_attrib(sbml_reaction, "id", require=True) - reaction = Reaction(clip(reaction, "R_")) - reaction.name = sbml_reaction.get("name") - annotate_cobra_from_sbml(reaction, sbml_reaction) - lb_id = get_attrib(sbml_reaction, "fbc:lowerFluxBound", require=True) - ub_id = get_attrib(sbml_reaction, "fbc:upperFluxBound", require=True) - try: - reaction.upper_bound = bounds[ub_id] - reaction.lower_bound = bounds[lb_id] - except KeyError as e: - raise CobraSBMLError("No constant bound with id '%s'" % str(e)) - reactions.append(reaction) - - stoichiometry = defaultdict(lambda: 0) - for species_reference in sbml_reaction.findall( - ns("sbml:listOfReactants/sbml:speciesReference")): - met_name = clip(species_reference.get("species"), "M_") - stoichiometry[met_name] -= \ - number(species_reference.get("stoichiometry")) - for species_reference in sbml_reaction.findall( - ns("sbml:listOfProducts/sbml:speciesReference")): - met_name = clip(species_reference.get("species"), "M_") - stoichiometry[met_name] += \ - get_attrib(species_reference, "stoichiometry", - type=number, require=True) - # needs to have keys of metabolite objects, not ids - object_stoichiometry = {} - for met_id in stoichiometry: - if met_id in boundary_metabolites: - warn("Boundary metabolite '%s' used in reaction '%s'" % - (met_id, reaction.id)) - continue - try: - metabolite = model.metabolites.get_by_id(met_id) - except KeyError: - warn("ignoring unknown metabolite '%s' in reaction %s" % - (met_id, reaction.id)) - continue - object_stoichiometry[metabolite] = stoichiometry[met_id] - reaction.add_metabolites(object_stoichiometry) - # set gene reaction rule - gpr_xml = sbml_reaction.find(GPR_TAG) - if gpr_xml is not None and len(gpr_xml) != 1: - warn("ignoring invalid geneAssociation for " + repr(reaction)) - gpr_xml = None - gpr = process_gpr(gpr_xml[0]) if gpr_xml is not None else '' - # remove outside parenthesis, if any - if gpr.startswith("(") and gpr.endswith(")"): - gpr = gpr[1:-1].strip() - gpr = gpr.replace(SBML_DOT, ".") - reaction.gene_reaction_rule = gpr - try: - model.add_reactions(reactions) - except ValueError as e: - warn(str(e)) - - # objective coefficients are handled after all reactions are added - obj_list = xml_model.find(ns("fbc:listOfObjectives")) - if obj_list is None: - warn("listOfObjectives element not found") - return model - target_objective_id = get_attrib(obj_list, "fbc:activeObjective") - target_objective = obj_list.find( - ns("fbc:objective[@fbc:id='{}']".format(target_objective_id))) - obj_direction_long = get_attrib(target_objective, "fbc:type") - obj_direction = LONG_SHORT_DIRECTION[obj_direction_long] - - obj_query = OBJECTIVES_XPATH % target_objective_id - coefficients = {} - for sbml_objective in obj_list.findall(obj_query): - rxn_id = clip(get_attrib(sbml_objective, "fbc:reaction"), "R_") - try: - objective_reaction = model.reactions.get_by_id(rxn_id) - except KeyError: - raise CobraSBMLError("Objective reaction '%s' not found" % rxn_id) - try: - coefficients[objective_reaction] = get_attrib( - sbml_objective, "fbc:coefficient", type=number) - except ValueError as e: - warn(str(e)) - set_objective(model, coefficients) - model.solver.objective.direction = obj_direction - return model - - -def model_to_xml(cobra_model, units=True): - xml = Element("sbml", xmlns=namespaces["sbml"], level="3", version="1", - sboTerm="SBO:0000624") - set_attrib(xml, "fbc:required", "false") - xml_model = SubElement(xml, "model") - set_attrib(xml_model, "fbc:strict", "true") - if cobra_model.id is not None: - xml_model.set("id", cobra_model.id) - if cobra_model.name is not None: - xml_model.set("name", cobra_model.name) - - # if using units, add in mmol/gdw/hr - if units: - unit_def = SubElement( - SubElement(xml_model, "listOfUnitDefinitions"), - "unitDefinition", id="mmol_per_gDW_per_hr") - list_of_units = SubElement(unit_def, "listOfUnits") - SubElement(list_of_units, "unit", kind="mole", scale="-3", - multiplier="1", exponent="1") - SubElement(list_of_units, "unit", kind="gram", scale="0", - multiplier="1", exponent="-1") - SubElement(list_of_units, "unit", kind="second", scale="0", - multiplier="3600", exponent="-1") - - # create the element for the flux objective - obj_list_tmp = SubElement(xml_model, ns("fbc:listOfObjectives")) - set_attrib(obj_list_tmp, "fbc:activeObjective", "obj") - obj_list_tmp = SubElement(obj_list_tmp, ns("fbc:objective")) - set_attrib(obj_list_tmp, "fbc:id", "obj") - set_attrib(obj_list_tmp, "fbc:type", - SHORT_LONG_DIRECTION[cobra_model.objective.direction]) - flux_objectives_list = SubElement(obj_list_tmp, - ns("fbc:listOfFluxObjectives")) - - # create the element for the flux bound parameters - parameter_list = SubElement(xml_model, "listOfParameters") - param_attr = {"constant": "true"} - if units: - param_attr["units"] = "mmol_per_gDW_per_hr" - # the most common bounds are the minimum, maximum, and 0 - if len(cobra_model.reactions) > 0: - min_value = min(cobra_model.reactions.list_attr("lower_bound")) - max_value = max(cobra_model.reactions.list_attr("upper_bound")) - else: - min_value = CONFIGURATION.lower_bound - max_value = CONFIGURATION.upper_bound - - SubElement(parameter_list, "parameter", value=strnum(min_value), - id="cobra_default_lb", sboTerm="SBO:0000626", **param_attr) - SubElement(parameter_list, "parameter", value=strnum(max_value), - id="cobra_default_ub", sboTerm="SBO:0000626", **param_attr) - SubElement(parameter_list, "parameter", value="0", - id="cobra_0_bound", sboTerm="SBO:0000626", **param_attr) - - def create_bound(reaction, bound_type): - """returns the str id of the appropriate bound for the reaction - - The bound will also be created if necessary""" - value = getattr(reaction, bound_type) - if value == min_value: - return "cobra_default_lb" - elif value == 0: - return "cobra_0_bound" - elif value == max_value: - return "cobra_default_ub" - else: - param_id = "R_" + reaction.id + "_" + bound_type - SubElement(parameter_list, "parameter", id=param_id, - value=strnum(value), sboTerm="SBO:0000625", - **param_attr) - return param_id - - # add in compartments - compartments_list = SubElement(xml_model, "listOfCompartments") - compartments = cobra_model.compartments - for compartment, name in iteritems(compartments): - SubElement(compartments_list, "compartment", id=compartment, name=name, - constant="true") - - # add in metabolites - species_list = SubElement(xml_model, "listOfSpecies") - for met in cobra_model.metabolites: - species = SubElement(species_list, "species", - id="M_" + met.id, - # Useless required SBML parameters - constant="false", - boundaryCondition="false", - hasOnlySubstanceUnits="false") - set_attrib(species, "name", met.name) - annotate_sbml_from_cobra(species, met) - set_attrib(species, "compartment", met.compartment) - set_attrib(species, "fbc:charge", met.charge) - set_attrib(species, "fbc:chemicalFormula", met.formula) - - # add in genes - if len(cobra_model.genes) > 0: - genes_list = SubElement(xml_model, GENELIST_TAG) - for gene in cobra_model.genes: - gene_id = gene.id.replace(".", SBML_DOT) - sbml_gene = SubElement(genes_list, GENE_TAG) - set_attrib(sbml_gene, "fbc:id", "G_" + gene_id) - name = gene.name - if name is None or len(name) == 0: - name = gene.id - set_attrib(sbml_gene, "fbc:label", gene_id) - set_attrib(sbml_gene, "fbc:name", name) - annotate_sbml_from_cobra(sbml_gene, gene) - - # add in reactions - reactions_list = SubElement(xml_model, "listOfReactions") - for reaction in cobra_model.reactions: - id = "R_" + reaction.id - sbml_reaction = SubElement( - reactions_list, "reaction", - id=id, - # Useless required SBML parameters - fast="false", - reversible=str(reaction.lower_bound < 0).lower()) - set_attrib(sbml_reaction, "name", reaction.name) - annotate_sbml_from_cobra(sbml_reaction, reaction) - # add in bounds - set_attrib(sbml_reaction, "fbc:upperFluxBound", - create_bound(reaction, "upper_bound")) - set_attrib(sbml_reaction, "fbc:lowerFluxBound", - create_bound(reaction, "lower_bound")) - - # objective coefficient - if reaction.objective_coefficient != 0: - objective = SubElement(flux_objectives_list, - ns("fbc:fluxObjective")) - set_attrib(objective, "fbc:reaction", id) - set_attrib(objective, "fbc:coefficient", - strnum(reaction.objective_coefficient)) - - # stoichiometry - reactants = {} - products = {} - for metabolite, stoichiomety in iteritems(reaction._metabolites): - met_id = "M_" + metabolite.id - if stoichiomety > 0: - products[met_id] = strnum(stoichiomety) - else: - reactants[met_id] = strnum(-stoichiomety) - if len(reactants) > 0: - reactant_list = SubElement(sbml_reaction, "listOfReactants") - for met_id, stoichiomety in sorted(iteritems(reactants)): - SubElement(reactant_list, "speciesReference", species=met_id, - stoichiometry=stoichiomety, constant="true") - if len(products) > 0: - product_list = SubElement(sbml_reaction, "listOfProducts") - for met_id, stoichiomety in sorted(iteritems(products)): - SubElement(product_list, "speciesReference", species=met_id, - stoichiometry=stoichiomety, constant="true") - - # gene reaction rule - gpr = reaction.gene_reaction_rule - if gpr is not None and len(gpr) > 0: - gpr = gpr.replace(".", SBML_DOT) - gpr_xml = SubElement(sbml_reaction, GPR_TAG) - try: - parsed, _ = parse_gpr(gpr) - construct_gpr_xml(gpr_xml, parsed.body) - except Exception as e: - print("failed on '%s' in %s" % - (reaction.gene_reaction_rule, repr(reaction))) - raise e - - return xml - - -def read_sbml_model(filename, number=float, **kwargs): - if not _with_lxml: - warn("Install lxml for faster SBML I/O", ImportWarning) - xmlfile = parse_stream(filename) - xml = xmlfile.getroot() - # use libsbml if not l3v1 with fbc v2 - use_libsbml = (xml.get("level") != "3" or xml.get("version") != "1" or - get_attrib(xml, "fbc:required") is None) - if use_libsbml: - if libsbml is None: - raise ImportError("libSBML required for fbc < 2") - # libsbml needs a file string, so write to temp file if a file handle - if hasattr(filename, "read"): - with NamedTemporaryFile(suffix=".xml", delete=False) as outfile: - xmlfile.write(outfile, encoding="UTF-8", xml_declaration=True) - filename = outfile.name - return read_sbml2(filename, **kwargs) - try: - return parse_xml_into_model(xml, number=number) - except Exception: - raise CobraSBMLError( - "Something went wrong reading the model. You can get a detailed " - "report using the `cobra.io.sbml3.validate_sbml_model` function " - "or using the online validator at http://sbml.org/validator") - - -id_required = {ns(i) for i in ("sbml:model", "sbml:reaction:", "sbml:species", - "fbc:geneProduct", "sbml:compartment", - "sbml:parameter", "sbml:UnitDefinition", - "fbc:objective")} -invalid_id_detector = re.compile("|".join(re.escape(i[0]) for i in _renames)) - - -def validate_sbml_model(filename, check_model=True): - """Returns the model along with a list of errors. - - Parameters - ---------- - filename : str - The filename of the SBML model to be validated. - check_model: bool, optional - Whether to also check some basic model properties such as reaction - boundaries and compartment formulas. - - Returns - ------- - model : :class:`~cobra.core.Model.Model` object - The cobra model if the file could be read succesfully or None - otherwise. - errors : dict - Warnings and errors grouped by their respective types. - - Raises - ------ - CobraSBMLError - If the file is not a valid SBML Level 3 file with FBC. - """ - xmlfile = parse_stream(filename) - xml = xmlfile.getroot() - # use libsbml if not l3v1 with fbc v2 - use_libsbml = (xml.get("level") != "3" or xml.get("version") != "1" or - get_attrib(xml, "fbc:required") is None) - if use_libsbml: - raise CobraSBMLError("XML is not SBML level 3 v1 with fbc v2") - - errors = {k: [] for k in ("validator", "warnings", "SBML errors", "other")} - - def err(err_msg, group="validator"): - errors[group].append(err_msg) - - # make sure there is exactly one model - xml_models = xml.findall(ns("sbml:model")) - if len(xml_models) == 0: - raise CobraSBMLError("No SBML model detected in file") - if len(xml_models) > 1: - err("More than 1 SBML model detected in file") - xml_model = xml_models[0] - - # make sure all sbml id's are valid - all_ids = set() - for element in xmlfile.iter(): - if element.tag.startswith(fbc_prefix): - prefix = fbc_prefix - elif element.tag.startswith(sbml_prefix): - prefix = "" - else: - continue - str_id = element.get(prefix + "id", None) - element_name = element.tag.split("}")[-1] - id_repr = "%s id '%s' " % (element_name, str_id) - if str_id is None or len(str_id) == 0: - if element.tag in id_required: - err(element_name + " missing id") - else: - if str_id in all_ids: - err("duplicate id for " + id_repr) - all_ids.add(str_id) - try: - str_id.encode("ascii") - except UnicodeEncodeError as e: - err("invalid character '%s' found in %s" % - (str_id[e.start:e.end], id_repr)) - if invalid_id_detector.search(str_id): - bad_chars = "".join(invalid_id_detector.findall(str_id)) - err("invalid character%s '%s' found in %s" % - ("s" if len(bad_chars) > 1 else "", bad_chars, id_repr)) - if not str_id[0].isalpha(): - err("%s does not start with alphabet character" % id_repr) - - # check SBO terms - for element in xml.findall(".//*[@sboTerm]"): - sbo_term = element.get("sboTerm") - if not sbo_term.startswith("SBO:"): - err("sboTerm '%s' does not begin with 'SBO:'" % sbo_term) - - # ensure can be made into model - # all warnings generated while loading will be logged as errors - with catch_warnings(record=True) as warning_list: - simplefilter("always") - try: - model = parse_xml_into_model(xml) - except CobraSBMLError as e: - err(str(e), "SBML errors") - return None, errors - except Exception as e: - err(str(e), "other") - return None, errors - errors["warnings"].extend(str(i.message) for i in warning_list) - - # check genes - xml_genes = { - get_attrib(i, "fbc:id").replace(SBML_DOT, ".") - for i in xml_model.iterfind(GENES_XPATH)} - for gene in model.genes: - if "G_" + gene.id not in xml_genes and gene.id not in xml_genes: - err("No gene specfied with id 'G_%s'" % gene.id) - - if check_model: - errors["validator"].extend(check_metabolite_compartment_formula(model)) - - return model, errors - - -def write_sbml_model(cobra_model, filename, use_fbc_package=True, **kwargs): - if not _with_lxml: - warn("Install lxml for faster SBML I/O", ImportWarning) - if not use_fbc_package: - if libsbml is None: - raise ImportError("libSBML required to write non-fbc models") - write_sbml2(cobra_model, filename, use_fbc_package=False, **kwargs) - return - # create xml - xml = model_to_xml(cobra_model, **kwargs) - write_args = {"encoding": "UTF-8", "xml_declaration": True} - if _with_lxml: - write_args["pretty_print"] = True - write_args["pretty_print"] = True - else: - indent_xml(xml) - # write xml to file - should_close = True - if hasattr(filename, "write"): - xmlfile = filename - should_close = False - elif filename.endswith(".gz"): - xmlfile = GzipFile(filename, "wb") - elif filename.endswith(".bz2"): - xmlfile = BZ2File(filename, "wb") - else: - xmlfile = open(filename, "wb") - ElementTree(xml).write(xmlfile, **write_args) - if should_close: - xmlfile.close() - - -# inspired by http://effbot.org/zone/element-lib.htm#prettyprint -def indent_xml(elem, level=0): - """indent xml for pretty printing""" - i = "\n" + level * " " - if len(elem): - if not elem.text or not elem.text.strip(): - elem.text = i + " " - if not elem.tail or not elem.tail.strip(): - elem.tail = i - for elem in elem: - indent_xml(elem, level + 1) - if not elem.tail or not elem.tail.strip(): - elem.tail = i - else: - if level and (not elem.tail or not elem.tail.strip()): - elem.tail = i diff --git a/cobra/manipulation/delete.py b/cobra/manipulation/delete.py index 6650aa3d7..de7d05453 100644 --- a/cobra/manipulation/delete.py +++ b/cobra/manipulation/delete.py @@ -230,4 +230,8 @@ def remove_genes(cobra_model, gene_list, remove_reactions=True): reaction.gene_reaction_rule = new_rule for gene in gene_set: cobra_model.genes.remove(gene) + # remove reference to the gene in all groups + associated_groups = cobra_model.get_associated_groups(gene) + for group in associated_groups: + group.remove_members(gene) cobra_model.remove_reactions(target_reactions) diff --git a/cobra/medium/boundary_types.py b/cobra/medium/boundary_types.py index 2cc3249bb..3535c1219 100644 --- a/cobra/medium/boundary_types.py +++ b/cobra/medium/boundary_types.py @@ -102,7 +102,11 @@ def is_boundary_type(reaction, boundary_type, external_compartment): on a heuristic. """ # Check if the reaction has an annotation. Annotations dominate everything. - sbo_term = reaction.annotation.get("sbo", "").upper() + sbo_term = reaction.annotation.get("sbo", "") + if isinstance(sbo_term, list): + sbo_term = sbo_term[0] + sbo_term = sbo_term.upper() + if sbo_term == sbo_terms[boundary_type]: return True if sbo_term in [sbo_terms[k] for k in sbo_terms if k != boundary_type]: diff --git a/cobra/test/data/e_coli_core.xml b/cobra/test/data/e_coli_core.xml new file mode 100644 index 000000000..a50651aae --- /dev/null +++ b/cobra/test/data/e_coli_core.xml @@ -0,0 +1,8293 @@ + + +This is a metabolism model of Escherichia coli str. K-12 substr. MG1655 in + SBML format.
+Redistribution and use of any part of this model from BiGG Models knowledge-base, with or without modification, are permitted provided that the following conditions are met: +
For specific licensing terms about this particular model and regulations of commercial use, see + this model in BiGG Models knowledge-base.
+