# -*- coding: utf-8 -*-
# Copyright 2016 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Helper functions that are used all over the memote package."""
from __future__ import absolute_import
import logging
import re
from builtins import dict
from collections import defaultdict
import numpy as np
import warnings
with warnings.catch_warnings():
warnings.simplefilter("ignore", UserWarning)
# ignore Gurobi warning
from cobra.exceptions import Infeasible
from six import iteritems, itervalues
from sympy import expand
LOGGER = logging.getLogger(__name__)
[docs]def find_transported_elements(rxn):
"""
Return a dictionary showing the amount of transported elements of a rxn.
Collects the elements for each metabolite participating in a reaction,
multiplies the amount by the metabolite's stoichiometry in the reaction and
bins the result according to the compartment that metabolite is in. This
produces a dictionary of dictionaries such as this
``{'p': {'C': -1, 'H': -4}, c: {'C': 1, 'H': 4}}`` which shows the
transported entities. This dictionary is then simplified to only include
the non-zero elements of one single compartment i.e. showing the precise
elements that are transported.
Parameters
----------
rxn : cobra.Reaction
Any cobra.Reaction containing metabolites.
"""
element_dist = defaultdict()
# Collecting elements for each metabolite.
for met in rxn.metabolites:
if met.compartment not in element_dist:
# Multiplication by the metabolite stoichiometry.
element_dist[met.compartment] = \
{k: v * rxn.metabolites[met]
for (k, v) in iteritems(met.elements)}
else:
x = {k: v * rxn.metabolites[met] for (k, v) in
iteritems(met.elements)}
y = element_dist[met.compartment]
element_dist[met.compartment] = \
{k: x.get(k, 0) + y.get(k, 0) for k in set(x) | set(y)}
delta_dict = defaultdict()
# Simplification of the resulting dictionary of dictionaries.
for elements in itervalues(element_dist):
delta_dict.update(elements)
# Only non-zero values get included in the returned delta-dict.
delta_dict = {k: abs(v) for (k, v) in iteritems(delta_dict) if v != 0}
return delta_dict
[docs]def find_transport_reactions(model):
"""
Return a list of all transport reactions.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
Notes
-----
A transport reaction is defined as follows:
1. It contains metabolites from at least 2 compartments and
2. at least 1 metabolite undergoes no chemical reaction, i.e.,
the formula stays the same on both sides of the equation.
This function will not identify transport via the PTS System.
"""
transport_reactions = []
for rxn in model.reactions:
# Collecting criteria to classify transporters by.
rxn_reactants = set([met.formula for met in rxn.reactants])
rxn_products = set([met.formula for met in rxn.products])
# Looking for formulas that stay the same on both side of the reaction.
transported_mets = \
[formula for formula in rxn_reactants if formula in rxn_products]
# Collect information on the elemental differences between
# compartments in the reaction.
delta_dicts = find_transported_elements(rxn)
non_zero_array = [v for (k, v) in iteritems(delta_dicts) if v != 0]
# Weeding out reactions such as oxidoreductases where no net
# transport of Hydrogen is occurring, but rather just an exchange of
# electrons or charges effecting a change in protonation.
if set(transported_mets) != set('H') and list(
delta_dicts.keys()
) == ['H']:
pass
# All other reactions for which the amount of transported elements is
# not zero, which are not part of the model's exchange nor
# biomass reactions, are defined as transport reactions.
# This includes reactions where the transported metabolite reacts with
# a carrier molecule.
elif sum(non_zero_array) and rxn not in model.exchanges and \
rxn not in find_biomass_reaction(model):
transport_reactions.append(rxn)
return transport_reactions
[docs]def find_converting_reactions(model, pair):
"""
Find reactions which convert a given metabolite pair.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
pair: tuple or list
A pair of metabolite identifiers without compartment suffix.
Returns
-------
frozenset
The set of reactions that have one of the pair on their left-hand
side and the other on the right-hand side.
"""
met_ids = [m.id for m in model.metabolites]
first = set(model.metabolites.get_by_id(m)
for m in met_ids if m.startswith(pair[0]))
second = set(model.metabolites.get_by_id(m)
for m in met_ids if m.startswith(pair[1]))
hits = list()
for rxn in model.reactions:
if len(first & set(rxn.reactants)) > 0 and len(
second & set(rxn.products)) > 0:
hits.append(rxn)
elif len(first & set(rxn.products)) > 0 and len(
second & set(rxn.reactants)) > 0:
hits.append(rxn)
return frozenset(hits)
# TODO: Improve the heuristics of identifying the biomass reaction(s)!!!
[docs]def find_biomass_reaction(model):
"""
Return a list of the biomass reaction(s) of the model.
Parameters
----------
model : cobra.Model
The metabolic model under investigation.
"""
return [rxn for rxn in model.reactions if "biomass" in rxn.id.lower()]
[docs]def df2dict(df):
"""Turn a `pandas.DataFrame` into a `dict` of lists."""
blob = dict((key, df[key].tolist()) for key in df.columns)
blob["index"] = df.index.tolist()
return blob
[docs]def find_demand_reactions(model):
u"""
Return a list of demand reactions.
Parameters
----------
model : cobra.Model
A cobrapy metabolic model
Notes
-----
[1] defines demand reactions as:
-- 'unbalanced network reactions that allow the accumulation of a compound'
-- reactions that are chiefly added during the gap-filling process
-- as a means of dealing with 'compounds that are known to be produced by
the organism [..] (i) for which no information is available about their
fractional distribution to the biomass or (ii) which may only be produced
in some environmental conditions
-- reactions with a formula such as: 'met_c -> '
Demand reactions differ from exchange reactions in that the metabolites
are not removed from the extracellular environment, but from any of the
organism's compartments.
References
----------
.. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for
generating a high-quality genome-scale metabolic reconstruction.
Nature protocols. Nature Publishing Group.
http://doi.org/10.1038/nprot.2009.203
"""
demand_and_exchange_rxns = set(model.exchanges)
return [rxn for rxn in demand_and_exchange_rxns
if not rxn.reversibility and not
any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_sink_reactions(model):
u"""
Return a list of sink reactions.
Parameters
----------
model : cobra.Model
A cobrapy metabolic model
Notes
-----
[1] defines sink reactions as:
-- 'similar to demand reactions' but reversible, thus able to supply the
model with metabolites
-- reactions that are chiefly added during the gap-filling process
-- as a means of dealing with 'compounds that are produced by nonmetabolic
cellular processes but that need to be metabolized'
-- reactions with a formula such as: 'met_c <-> '
Sink reactions differ from exchange reactions in that the metabolites
are not removed from the extracellular environment, but from any of the
organism's compartments.
References
----------
.. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for
generating a high-quality genome-scale metabolic reconstruction.
Nature protocols. Nature Publishing Group.
http://doi.org/10.1038/nprot.2009.203
"""
demand_and_exchange_rxns = set(model.exchanges)
return [rxn for rxn in demand_and_exchange_rxns
if rxn.reversibility and not
any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_exchange_rxns(model):
u"""
Return a list of exchange reactions.
Parameters
----------
model : cobra.Model
A cobrapy metabolic model
Notes
-----
[1] defines exchange reactions as:
-- reactions that 'define the extracellular environment'
-- 'unbalanced, extra-organism reactions that represent the supply to or
removal of metabolites from the extra-organism "space"'
-- reactions with a formula such as: 'met_e -> ' or ' -> met_e' or
'met_e <=> '
Exchange reactions differ from demand reactions in that the metabolites
are removed from or added to the extracellular environment only. With this
the uptake or secretion of a metabolite is modeled, respectively.
References
----------
.. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for
generating a high-quality genome-scale metabolic reconstruction.
Nature protocols. Nature Publishing Group.
http://doi.org/10.1038/nprot.2009.203
"""
demand_and_exchange_rxns = set(model.exchanges)
return [rxn for rxn in demand_and_exchange_rxns
if any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_functional_units(gpr_str):
"""
Return an iterator of gene IDs grouped by boolean rules from the gpr_str.
The gpr_str is first transformed into an algebraic expression, replacing
the boolean operators 'or' with '+' and 'and' with '*'. Treating the
gene IDs as sympy.symbols this allows a mathematical expansion of the
algebraic expression. The expanded form is then split again producing sets
of gene IDs that in the gpr_str had an 'and' relationship.
Parameters
----------
gpr_str : string
A string consisting of gene ids and the boolean expressions 'and'
and 'or'
"""
algebraic_form = re.sub('[Oo]r', '+', re.sub('[Aa]nd', '*', gpr_str))
expanded = str(expand(algebraic_form))
for unit in expanded.replace('+', ',').split(' , '):
yield unit.split('*')
[docs]def run_fba(model, rxn_id, direction="max", single_value=True):
"""
Return the solution of an FBA to a set objective function.
Parameters
----------
model : cobra.Model
A cobrapy metabolic model
rxn_id : string
A string containing the reaction ID of the desired FBA objective
direction: string
A string containing either "max" or "min" to specify the direction
of the desired FBA objective function
single_value: boolean
Indicates whether the results for all reactions are gathered from the
solver, or only the result for the objective value.
Returns
-------
cobra.solution
The cobra solution object for the corresponding FBA problem
"""
model.objective = model.reactions.get_by_id(rxn_id)
model.objective_direction = direction
if single_value:
try:
return model.slim_optimize()
except Infeasible:
return np.nan
else:
try:
solution = model.optimize()
return solution
except Infeasible:
return np.nan
[docs]def close_boundaries_sensibly(model):
"""
Return a cobra model with all boundaries closed and changed constraints.
In the returned model previously fixed reactions are no longer constrained
as such. Instead reactions are constrained according to their
reversibility. This is to prevent the FBA from becoming infeasible when
trying to solve a model with closed exchanges and one fixed reaction.
Parameters
----------
model : cobra.Model
A cobrapy metabolic model
Returns
-------
cobra.Model
A cobra model with all boundary reactions closed and the constraints
of each reaction set according to their reversibility.
"""
with model:
for rxn in model.reactions:
if rxn.reversibility:
rxn.bounds = -1, 1
else:
rxn.bounds = 0, 1
for exchange in model.exchanges:
exchange.bounds = (0, 0)
return model.copy()