# -*- coding: utf-8 -*-
# Copyright 2017 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Supporting functions for stoichiometric consistency checks."""
import logging
import multiprocessing
from operator import attrgetter
import numpy as np
from cobra import Configuration, Reaction
from cobra.exceptions import Infeasible
from cobra.flux_analysis import flux_variability_analysis
from optlang.interface import INFEASIBLE, OPTIMAL
from optlang.symbolics import Zero
import memote.support.consistency_helpers as con_helpers
import memote.support.helpers as helpers
[docs]LOGGER = logging.getLogger(__name__)
# The following dictionary is based on the list of energy metabolites chosen
# as part of the following publication:
# Fritzemeier, C. J., Hartleb, D., Szappanos, B., Papp, B., & Lercher,
# M. J. (2017). Erroneous energy-generating cycles in published genome scale
# metabolic networks: Identification and removal. PLoS Computational
# Biology, 13(4), 1–14. http://doi.org/10.1371/journal.pcbi.1005494
[docs]ENERGY_COUPLES = {
"MNXM3": "MNXM7",
"MNXM63": "MNXM220",
"MNXM51": "MNXM30",
"MNXM121": "MNXM17",
"MNXM423": "MNXM495",
"MNXM6": "MNXM5",
"MNXM10": "MNXM8",
"MNXM38": "MNXM33",
"MNXM208": "MNXM119",
"MNXM191": "MNXM232",
"MNXM223": "MNXM509",
"MNXM7517": "MNXM12235",
"MNXM12233": "MNXM12236",
"MNXM558": "MNXM2178",
"MNXM21": "MNXM12",
"MNXM89557": "MNXM20",
}
[docs]TOLERANCE_THRESHOLD = 1e-07
[docs]cobra_configuration = Configuration()
[docs]def check_stoichiometric_consistency(model):
"""
Verify the consistency of the model's stoichiometry.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
Notes
-----
See [1]_ section 3.1 for a complete description of the algorithm.
.. [1] Gevorgyan, A., M. G Poolman, and D. A Fell.
"Detection of Stoichiometric Inconsistencies in Biomolecular
Models."
Bioinformatics 24, no. 19 (2008): 2245.
"""
problem = model.problem
# The transpose of the stoichiometric matrix N.T in the paper.
stoich_trans = problem.Model()
# We clone the existing configuration in order to apply non-default
# settings, for example, for solver verbosity or timeout.
stoich_trans.configuration = problem.Configuration.clone(
config=model.solver.configuration, problem=stoich_trans
)
internal_rxns = con_helpers.get_internals(model)
metabolites = set(met for rxn in internal_rxns for met in rxn.metabolites)
LOGGER.info("model '%s' has %d internal reactions", model.id, len(internal_rxns))
LOGGER.info("model '%s' has %d internal metabolites", model.id, len(metabolites))
stoich_trans.add([problem.Variable(m.id, lb=1) for m in metabolites])
stoich_trans.update()
con_helpers.add_reaction_constraints(
stoich_trans, internal_rxns, problem.Constraint
)
# The objective is to minimize the metabolite mass vector.
stoich_trans.objective = problem.Objective(Zero, direction="min", sloppy=True)
stoich_trans.objective.set_linear_coefficients(
{var: 1.0 for var in stoich_trans.variables}
)
status = stoich_trans.optimize()
if status == OPTIMAL:
return True
elif status == INFEASIBLE:
return False
else:
raise RuntimeError(
"Could not determine stoichiometric consistencty."
" Solver status is '{}'"
" (only optimal or infeasible expected).".format(status)
)
)
[docs]def find_inconsistent_min_stoichiometry(model, atol=1e-13, max_mets_computed=10):
"""
Detect inconsistent minimal net stoichiometries.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
atol : float, optional
Values below the absolute tolerance are treated as zero. Expected to be
very small but larger than zero.
max_mets_computed: int, optional
To avoid computing for too long, a soft cap is added to the number of
computed unconserved metabolites (trivial cases are ignored).
Notes
-----
See [1]_ section 3.3 for a complete description of the algorithm.
References
----------
.. [1] Gevorgyan, A., M. G Poolman, and D. A Fell.
"Detection of Stoichiometric Inconsistencies in Biomolecular
Models."
Bioinformatics 24, no. 19 (2008): 2245.
"""
if check_stoichiometric_consistency(model):
return set()
Model, Constraint, Variable, Objective = con_helpers.get_interface(model)
unconserved_mets = find_unconserved_metabolites(model)
LOGGER.info("model has %d unconserved metabolites", len(unconserved_mets))
internal_rxns = con_helpers.get_internals(model)
internal_mets = set(met for rxn in internal_rxns for met in rxn.metabolites)
get_id = attrgetter("id")
reactions = sorted(internal_rxns, key=get_id)
metabolites = sorted(internal_mets, key=get_id)
stoich, met_index, rxn_index = con_helpers.stoichiometry_matrix(
metabolites, reactions
)
left_ns = con_helpers.nullspace(stoich.T, atol)
if left_ns.size == 0:
LOGGER.info("Left nullspace is empty!")
return {(met,) for met in unconserved_mets}
(problem, indicators) = con_helpers.create_milp_problem(
left_ns, metabolites, Model, Variable, Constraint, Objective
)
# We clone the existing configuration in order to apply non-default
# settings, for example, for solver verbosity or timeout.
problem.configuration = model.problem.Configuration.clone(
config=model.solver.configuration, problem=problem
)
LOGGER.info("Left nullspace has a dimension of %d.", left_ns.shape[1])
LOGGER.debug("%s", str(problem))
inc_minimal = set()
cuts = list()
n_computed = 0
for met in unconserved_mets:
# always add the met as an uncoserved set if there is no left nullspace
row = met_index[met]
if (left_ns[row] == 0.0).all():
LOGGER.debug("%s: singleton minimal unconservable set.", met.id)
# singleton set!
inc_minimal.add((met,))
continue
if n_computed >= max_mets_computed:
LOGGER.debug("max number of computed unconserved metabolites reached")
break
# expect a positive mass for the unconserved metabolite
problem.variables[met.id].lb = 1e-3
status = problem.optimize()
while status == "optimal":
LOGGER.debug("%s: status %s", met.id, status)
LOGGER.debug(
"sum of all primal values: %f", sum(problem.primal_values.values())
)
LOGGER.debug(
"sum of binary indicators: %f", sum(var.primal for var in indicators)
)
solution = [
model.metabolites.get_by_id(var.name[2:])
for var in indicators
if var.primal > 0.2
]
LOGGER.debug("%s: set size %d", met.id, len(solution))
n_computed += 1
inc_minimal.add(tuple(solution))
if len(solution) == 1:
break
cuts.append(
con_helpers.add_cut(problem, indicators, len(solution) - 1, Constraint)
)
status = problem.optimize()
LOGGER.debug("%s: last status %s", met.id, status)
# reset
problem.variables[met.id].lb = 0.0
problem.remove(cuts)
cuts.clear()
return inc_minimal
[docs]def find_elementary_leakage_modes(model, atol=1e-13):
"""
Detect elementary leakage modes.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
atol : float, optional
Values below the absolute tolerance are treated as zero. Expected to be
very small but larger than zero.
Notes
-----
See [1]_ section 3.4 for a complete description of the algorithm.
References
----------
.. [1] Gevorgyan, A., M. G Poolman, and D. A Fell.
"Detection of Stoichiometric Inconsistencies in Biomolecular
Models."
Bioinformatics 24, no. 19 (2008): 2245.
"""
raise NotImplementedError("Coming soon™ if considered useful.")
[docs]def detect_energy_generating_cycles(model, metabolite_id):
"""
Detect erroneous energy-generating cycles for a a single metabolite.
The function will first build a dissipation reaction corresponding to the
input metabolite. This reaction is then set as the objective for
optimization, after closing all exchanges. If the reaction was able to
carry flux, an erroneous energy-generating cycle must be present. In this
case a list of reactions with a flux greater than zero is returned.
Otherwise, the function returns False.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
metabolite_id : str
The identifier of an energy metabolite.
Notes
-----
"[...] energy generating cycles (EGC) [...] charge energy metabolites
without a source of energy. [...] To efficiently identify the existence of
diverse EGCs, we first add a dissipation reaction to the metabolic network
for each metabolite used to transmit cellular energy; e.g., for ATP, the
irreversible reaction ATP + H2O → ADP + P + H+ is added. These dissipation
reactions close any existing energy-generating cycles, thereby converting
them to type-III pathways. Fluxes through any of the dissipation reactions
at steady state indicate the generation of energy through the metabolic
network. Second, all uptake reactions are constrained to zero. The sum of
the fluxes through the energy dissipation reactions is now maximized using
FBA. For a model without EGCs, these reactions cannot carry any flux
without the uptake of nutrients. [1]_."
References
----------
.. [1] Fritzemeier, C. J., Hartleb, D., Szappanos, B., Papp, B., & Lercher,
M. J. (2017). Erroneous energy-generating cycles in published genome scale
metabolic networks: Identification and removal. PLoS Computational
Biology, 13(4), 1–14. http://doi.org/10.1371/journal.pcbi.1005494
"""
main_comp = helpers.find_compartment_id_in_model(model, "c")
met = helpers.find_met_in_model(model, metabolite_id, main_comp)[0]
dissipation_rxn = Reaction("Dissipation")
if metabolite_id in ["MNXM3", "MNXM63", "MNXM51", "MNXM121", "MNXM423"]:
# build nucleotide-type dissipation reaction
dissipation_rxn.add_metabolites(
{
helpers.find_met_in_model(model, "MNXM2", main_comp)[0]: -1,
helpers.find_met_in_model(model, "MNXM1", main_comp)[0]: 1,
helpers.find_met_in_model(model, "MNXM9", main_comp)[0]: 1,
}
)
elif metabolite_id in ["MNXM6", "MNXM10"]:
# build nicotinamide-type dissipation reaction
dissipation_rxn.add_metabolites(
{helpers.find_met_in_model(model, "MNXM1", main_comp)[0]: 1}
)
elif metabolite_id in [
"MNXM38",
"MNXM208",
"MNXM191",
"MNXM223",
"MNXM7517",
"MNXM12233",
"MNXM558",
]:
# build redox-partner-type dissipation reaction
dissipation_rxn.add_metabolites(
{helpers.find_met_in_model(model, "MNXM1", main_comp)[0]: 2}
)
elif metabolite_id == "MNXM21":
dissipation_rxn.add_metabolites(
{
helpers.find_met_in_model(model, "MNXM2", main_comp)[0]: -1,
helpers.find_met_in_model(model, "MNXM1", main_comp)[0]: 1,
helpers.find_met_in_model(model, "MNXM26", main_comp)[0]: 1,
}
)
elif metabolite_id == "MNXM89557":
dissipation_rxn.add_metabolites(
{
helpers.find_met_in_model(model, "MNXM2", main_comp)[0]: -1,
helpers.find_met_in_model(model, "MNXM1", main_comp)[0]: 2,
helpers.find_met_in_model(model, "MNXM15", main_comp)[0]: 1,
}
)
dissipation_product = helpers.find_met_in_model(
model, ENERGY_COUPLES[metabolite_id], main_comp
)[0]
dissipation_rxn.add_metabolites({met: -1, dissipation_product: 1})
helpers.close_boundaries_sensibly(model)
model.add_reactions([dissipation_rxn])
model.objective = dissipation_rxn
solution = model.optimize(raise_error=True)
if solution.objective_value > 0.0:
return (
solution.fluxes[solution.fluxes.abs() > 0.0]
.index.drop(["Dissipation"])
.tolist()
)
else:
return []
[docs]def find_mass_unbalanced_reactions(reactions):
"""
Find metabolic reactions that are not mass balanced.
Parameters
----------
reactions : iterable
An iterable of cobra.Reaction's.
"""
return [rxn for rxn in reactions if not con_helpers.is_mass_balanced(rxn)]
[docs]def find_charge_unbalanced_reactions(reactions):
"""
Find metabolic reactions that are not charge balanced.
Parameters
----------
reactions : iterable
An iterable of cobra.Reaction's.
"""
return [rxn for rxn in reactions if not con_helpers.is_charge_balanced(rxn)]
[docs]def find_stoichiometrically_balanced_cycles(model):
"""
Find metabolic reactions in stoichiometrically balanced cycles (SBCs).
Identify forward and reverse cycles by closing all exchanges and using FVA.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
Notes
-----
"SBCs are artifacts of metabolic reconstructions due to insufficient
constraints (e.g., thermodynamic constraints and regulatory
constraints) [1]_." They are defined by internal reactions that carry
flux in spite of closed exchange reactions.
References
----------
.. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for
generating a high-quality genome-scale metabolic reconstruction.
Nature protocols. Nature Publishing Group.
http://doi.org/10.1038/nprot.2009.203
"""
helpers.close_boundaries_sensibly(model)
fva_result = flux_variability_analysis(model, loopless=False)
return fva_result.index[
(fva_result["minimum"] <= (-1 + TOLERANCE_THRESHOLD))
| (fva_result["maximum"] >= (1 - TOLERANCE_THRESHOLD))
].tolist()
[docs]def find_orphans(model):
"""
Return metabolites that are only consumed in reactions.
Metabolites that are involved in an exchange reaction are never
considered to be orphaned.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
exchanges = frozenset(model.exchanges)
return [
met
for met in model.metabolites
if (len(met.reactions) > 0)
and all(
(rxn not in exchanges) and con_helpers.is_only_substrate(met, rxn)
for rxn in met.reactions
)
]
[docs]def find_deadends(model):
"""
Return metabolites that are only produced in reactions.
Metabolites that are involved in an exchange reaction are never
considered to be dead ends.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
exchanges = frozenset(model.exchanges)
return [
met
for met in model.metabolites
if (len(met.reactions) > 0)
and all(
(rxn not in exchanges) and con_helpers.is_only_product(met, rxn)
for rxn in met.reactions
)
]
[docs]def find_disconnected(model):
"""
Return metabolites that are not in any of the reactions.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
return [met for met in model.metabolites if len(met.reactions) == 0]
[docs]def _init_worker(model, variable_name, coefficient):
"""
Initialize a global model object for multiprocessing.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
variable_name: str
The name of the variable representing the metabolite exchange.
coefficient: int
The value of the metabolite's stoichiometric coefficient: -1 to test
if the model can produce the metabolite and 1 to test if it can be
consumed.
"""
global _model
global _sink
global _coefficient
_model = model
_sink = model.variables[variable_name]
_model.objective = _sink
_coefficient = coefficient
[docs]def find_reactions_with_unbounded_flux_default_condition(model):
"""
Return list of reactions whose flux is unbounded in the default condition.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
Returns
-------
tuple
list
A list of reactions that in default modeling conditions are able to
carry flux as high/low as the systems maximal and minimal bounds.
float
The fraction of the amount of unbounded reactions to the amount of
non-blocked reactions.
list
A list of reactions that in default modeling conditions are not able
to carry flux at all.
"""
try:
fva_result = flux_variability_analysis(model, fraction_of_optimum=1.0)
except Infeasible as err:
LOGGER.error(
"Failed to find reactions with unbounded flux "
"because '{}'. This may be a bug.".format(err)
)
raise Infeasible(
"It was not possible to run flux variability "
"analysis on the model. Make sure that the model "
"can be solved! Check if the constraints are not "
"too strict."
)
# Per reaction (row) the flux is below threshold (close to zero).
conditionally_blocked = fva_result.loc[
fva_result.abs().max(axis=1) < TOLERANCE_THRESHOLD
].index.tolist()
small, large = helpers.find_bounds(model)
# Find those reactions whose flux is close to or outside of the median
# upper or lower bound, i.e., appears unconstrained.
unlimited_flux = fva_result.loc[
np.isclose(fva_result["maximum"], large, atol=TOLERANCE_THRESHOLD)
| (fva_result["maximum"] > large)
| np.isclose(fva_result["minimum"], small, atol=TOLERANCE_THRESHOLD)
| (fva_result["minimum"] < small)
].index.tolist()
try:
fraction = len(unlimited_flux) / (
len(model.reactions) - len(conditionally_blocked)
)
except ZeroDivisionError:
LOGGER.error(
"Division by Zero! Failed to calculate the "
"fraction of unbounded reactions. Does this model "
"have any reactions at all?"
)
raise ZeroDivisionError(
"It was not possible to calculate the "
"fraction of unbounded reactions to "
"un-blocked reactions. This may be because"
"the model doesn't have any reactions at "
"all or that none of the reactions can "
"carry a flux larger than zero!"
)
return unlimited_flux, fraction, conditionally_blocked