# -*- coding: utf-8 -*-
# Copyright 2017 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Supporting functions for stoichiometric consistency checks."""
from __future__ import absolute_import
import logging
from operator import attrgetter
import numpy as np
from cobra import Reaction
from cobra.exceptions import Infeasible
from cobra.flux_analysis import flux_variability_analysis
import memote.support.consistency_helpers as con_helpers
LOGGER = logging.getLogger(__name__)
# The following dictionary is based on the list of energy metabolites chosen
# as part of the following publication:
# Fritzemeier, C. J., Hartleb, D., Szappanos, B., Papp, B., & Lercher,
# M. J. (2017). Erroneous energy-generating cycles in published genome scale
# metabolic networks: Identification and removal. PLoS Computational
# Biology, 13(4), 1–14. http://doi.org/10.1371/journal.pcbi.1005494
ENERGY_COUPLES = {
'atp_c': 'adp_c',
'ctp_c': 'cdp_c',
'gtp_c': 'gdp_c',
'utp_c': 'udp_c',
'itp_c': 'idp_c',
'nadph_c': 'nadp_c',
'nadh_c': 'nad_c',
'fadh2_c': 'fad_c',
'fmnh2_c': 'fmn_c',
'q8h2_c': 'q8_c',
'mql8_c': 'mqn8_c',
'mql6_c': 'mqn6_c',
'mql7_c': 'mqn7_c',
'2dmmql8_c': '2dmmq8_c',
'accoa_c': 'coa_c',
'glu__L_c': 'akg_c',
'h_p': 'h_c'
}
[docs]def check_stoichiometric_consistency(model):
"""
Verify the consistency of the model stoichiometry.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
Notes
-----
See [1]_ section 3.1 for a complete description of the algorithm.
.. [1] Gevorgyan, A., M. G Poolman, and D. A Fell.
"Detection of Stoichiometric Inconsistencies in Biomolecular Models."
Bioinformatics 24, no. 19 (2008): 2245.
"""
Model, Constraint, Variable, Objective = con_helpers.get_interface(model)
# The transpose of the stoichiometric matrix N.T in the paper.
stoich_trans = Model()
internal_rxns = con_helpers.get_internals(model)
metabolites = set(met for rxn in internal_rxns for met in rxn.metabolites)
LOGGER.info("model '%s' has %d internal reactions", model.id,
len(internal_rxns))
LOGGER.info("model '%s' has %d internal metabolites", model.id,
len(metabolites))
for metabolite in metabolites:
stoich_trans.add(Variable(metabolite.id, lb=1))
stoich_trans.update()
con_helpers.add_reaction_constraints(
stoich_trans, internal_rxns, Constraint)
# The objective is to minimize the metabolite mass vector.
stoich_trans.objective = Objective(1)
stoich_trans.objective.set_linear_coefficients(
{var: 1. for var in stoich_trans.variables})
stoich_trans.objective.direction = "min"
status = stoich_trans.optimize()
if status == "optimal":
return True
elif status == "infeasible":
return False
else:
raise RuntimeError(
"Could not determine stoichiometric consistencty."
" Solver status is '{}'"
" (only optimal or infeasible expected).".format(status))
[docs]def find_inconsistent_min_stoichiometry(model, atol=1e-13):
"""
Detect inconsistent minimal net stoichiometries.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
atol : float, optional
Values below the absolute tolerance are treated as zero. Expected to be
very small but larger than zero.
Notes
-----
See [1]_ section 3.3 for a complete description of the algorithm.
.. [1] Gevorgyan, A., M. G Poolman, and D. A Fell.
"Detection of Stoichiometric Inconsistencies in Biomolecular Models."
Bioinformatics 24, no. 19 (2008): 2245.
"""
if check_stoichiometric_consistency(model):
return set()
Model, Constraint, Variable, Objective = con_helpers.get_interface(model)
unconserved_mets = find_unconserved_metabolites(model)
LOGGER.info("model has %d unconserved metabolites", len(unconserved_mets))
internal_rxns = con_helpers.get_internals(model)
internal_mets = set(met for rxn in internal_rxns for met in rxn.metabolites)
get_id = attrgetter("id")
reactions = sorted(internal_rxns, key=get_id)
metabolites = sorted(internal_mets, key=get_id)
stoich, met_index, rxn_index = con_helpers.stoichiometry_matrix(
metabolites, reactions)
left_ns = con_helpers.nullspace(stoich.T)
# deal with numerical instabilities
left_ns[np.abs(left_ns) < atol] = 0.0
LOGGER.info("nullspace has dimension %d", left_ns.shape[1])
inc_minimal = set()
(problem, indicators) = con_helpers.create_milp_problem(
left_ns, metabolites, Model, Variable, Constraint, Objective)
LOGGER.debug(str(problem))
cuts = list()
for met in unconserved_mets:
row = met_index[met]
if (left_ns[row] == 0.0).all():
LOGGER.debug("%s: singleton minimal unconservable set", met.id)
# singleton set!
inc_minimal.add((met,))
continue
# expect a positive mass for the unconserved metabolite
problem.variables[met.id].lb = 1e-3
status = problem.optimize()
while status == "optimal":
LOGGER.debug("%s: status %s", met.id, status)
LOGGER.debug("sum of all primal values: %f",
sum(problem.primal_values.values()))
LOGGER.debug("sum of binary indicators: %f",
sum(var.primal for var in indicators))
solution = [model.metabolites.get_by_id(var.name[2:])
for var in indicators if var.primal > 0.2]
LOGGER.debug("%s: set size %d", met.id, len(solution))
inc_minimal.add(tuple(solution))
if len(solution) == 1:
break
cuts.append(con_helpers.add_cut(
problem, indicators, len(solution) - 1, Constraint))
status = problem.optimize()
LOGGER.debug("%s: last status %s", met.id, status)
# reset
problem.variables[met.id].lb = 0.0
problem.remove(cuts)
cuts.clear()
return inc_minimal
[docs]def find_elementary_leakage_modes(model, atol=1e-13):
"""
Detect elementary leakage modes.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
atol : float, optional
Values below the absolute tolerance are treated as zero. Expected to be
very small but larger than zero.
Notes
-----
See [1]_ section 3.4 for a complete description of the algorithm.
References
----------
.. [1] Gevorgyan, A., M. G Poolman, and D. A Fell.
"Detection of Stoichiometric Inconsistencies in Biomolecular
Models."
Bioinformatics 24, no. 19 (2008): 2245.
"""
raise NotImplementedError(
"Coming soon™ if considered useful.")
[docs]def detect_energy_generating_cycles(model, metabolite_id):
"""
Detect erroneous energy-generating cycles for a a single metabolite.
The function will first build a dissipation reaction corresponding to the
input metabolite. This reaction is then set as the objective for
optimization, after closing all exchanges. If the reaction was able to
carry flux, an erroneous energy-generating cycle must be present. In this
case a list of reactions with a flux greater than zero is returned.
Otherwise, the function returns False.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
metabolite_id : str
The identifier of an energy metabolite.
Notes
-----
"[...] energy generating cycles (EGC) [...] charge energy metabolites
without a source of energy. [...] To efficiently identify the existence of
diverse EGCs, we first add a dissipation reaction to the metabolic network
for each metabolite used to transmit cellular energy; e.g., for ATP, the
irreversible reaction ATP + H2O → ADP + P + H+ is added. These dissipation
reactions close any existing energy-generating cycles, thereby converting
them to type-III pathways. Fluxes through any of the dissipation reactions
at steady state indicate the generation of energy through the metabolic
network. Second, all uptake reactions are constrained to zero. The sum of
the fluxes through the energy dissipation reactions is now maximized using
FBA. For a model without EGCs, these reactions cannot carry any flux
without the uptake of nutrients. [1]_."
References
----------
.. [1] Fritzemeier, C. J., Hartleb, D., Szappanos, B., Papp, B., & Lercher,
M. J. (2017). Erroneous energy-generating cycles in published genome scale
metabolic networks: Identification and removal. PLoS Computational
Biology, 13(4), 1–14. http://doi.org/10.1371/journal.pcbi.1005494
"""
met = model.metabolites.get_by_id(metabolite_id)
dissipation_product = model.metabolites.get_by_id(ENERGY_COUPLES[met.id])
dissipation_rxn = Reaction('Dissipation')
with model:
try:
maintenance = model.reactions.get_by_id('ATPM')
maintenance.bounds = 0, 1000
except KeyError:
pass
for exchange in model.exchanges:
exchange.bounds = (0, 0)
model.add_reactions([dissipation_rxn])
if met.id in ['atp_c', 'ctp_c', 'gtp_c', 'utp_c', 'itp_c']:
# build nucleotide-type dissipation reaction
dissipation_rxn.reaction = "h2o_c --> h_c + pi_c"
elif met.id in ['nadph_c', 'nadh_c']:
# build nicotinamide-type dissipation reaction
dissipation_rxn.reaction = "--> h_c"
elif met.id in ['fadh2_c', 'fmnh2_c', 'q8h2_c', 'mql8_c',
'mql6_c', 'mql7_c', 'dmmql8_c']:
# build redox-partner-type dissipation reaction
dissipation_rxn.reaction = "--> 2 h_c"
elif met.id == 'accoa_c':
dissipation_rxn.reaction = "h2o_c --> h_c + ac_c"
elif met.id == 'glu__L_c':
dissipation_rxn.reaction = "h2o_c --> 2 h_c + nh3_c"
elif met.id == 'h_p':
pass
dissipation_rxn.add_metabolites(
{met.id: -1, dissipation_product: 1})
model.objective = dissipation_rxn
solution = model.optimize()
if solution.status == 'infeasible':
return 'infeasible'
elif solution.objective_value > 0.0:
return solution.fluxes[solution.fluxes.abs() > 0.0].index. \
drop(["Dissipation"]).tolist()
else:
return []
[docs]def find_mass_imbalanced_reactions(model):
"""
Find metabolic reactions that are not mass balanced.
This will exclude biomass, exchange and demand reactions as they are
unbalanced by definition. It will also fail all reactions where at
least one metabolite does not have a formula defined.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
internal_rxns = con_helpers.get_internals(model)
return [
rxn for rxn in internal_rxns if not con_helpers.is_mass_balanced(rxn)]
[docs]def find_charge_imbalanced_reactions(model):
"""
Find metabolic reactions that are not charge balanced.
This will exclude biomass, exchange and demand reactions as they are
unbalanced by definition. It will also fail all reactions where at
least one metabolite does not have a charge defined.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
internal_rxns = con_helpers.get_internals(model)
return [
rxn for rxn in internal_rxns if not con_helpers.is_charge_balanced(rxn)]
[docs]def find_blocked_reactions(model):
"""
Find metabolic reactions that are blocked.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
Notes
-----
Blocked reactions are those reactions that when optimized for cannot carry
any flux while all exchanges are open.
"""
with model:
for rxn in model.exchanges:
rxn.bounds = (-1000, 1000)
fva_result = flux_variability_analysis(model)
blocked = fva_result.loc[(fva_result["maximum"] == 0.0) &
(fva_result["minimum"] == 0.0)]
return [model.reactions.get_by_id(name) for name in blocked.index]
[docs]def find_stoichiometrically_balanced_cycles(model):
"""
Find metabolic rxns in stoichiometrically balanced cycles (SBCs).
The flux distribution of nominal FVA is compared with loopless FVA
(loopless=True) to determine reactions that participate in loops, as
participation in loops would increase the flux through a given reactions
to the maximal bounds. This function then returns reactions where the
flux differs between the two FVA calculations.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
Notes
-----
"SBCs are artifacts of metabolic reconstructions due to insufficient
constraints (e.g., thermodynamic constraints and regulatory
constraints) [1]_." They are defined by internal reactions that carry
flux in spite of closed exchange reactions.
References
----------
.. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for
generating a high-quality genome-scale metabolic reconstruction.
Nature protocols. Nature Publishing Group.
http://doi.org/10.1038/nprot.2009.203
"""
try:
fva_result = flux_variability_analysis(model, loopless=False)
fva_result_loopless = flux_variability_analysis(model, loopless=True)
except Infeasible as err:
LOGGER.error("Failed to find stoichiometrically balanced cycles "
"because '{}'. This may be a bug.".format(err))
return []
row_ids_max = fva_result[
fva_result.maximum != fva_result_loopless.maximum].index
row_ids_min = fva_result[
fva_result.minimum != fva_result_loopless.minimum].index
differential_fluxes = set(row_ids_min).union(set(row_ids_max))
return [model.reactions.get_by_id(id) for id in differential_fluxes]
[docs]def find_orphans(model):
"""
Return metabolites that are only consumed in reactions.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
return [met for met in model.metabolites
if (len(met.reactions) > 0) and all(
(not rxn.reversibility) and (rxn.metabolites[met] < 0)
for rxn in met.reactions)]
[docs]def find_deadends(model):
"""
Return metabolites that are only produced in reactions.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
return [met for met in model.metabolites
if (len(met.reactions) > 0) and all(
(not rxn.reversibility) and
(rxn.metabolites[met] > 0) for rxn in met.reactions)]
[docs]def find_disconnected(model):
"""
Return metabolites that are not in any of the reactions.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
return [met for met in model.metabolites if len(met.reactions) == 0]