Source code for memote.support.helpers

# -*- coding: utf-8 -*-

# Copyright 2016 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Helper functions that are used all over the memote package."""

from __future__ import absolute_import

import logging
import re
from builtins import dict
from collections import defaultdict
import numpy as np
import warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore", UserWarning)
    # ignore Gurobi warning
    from cobra.exceptions import Infeasible

from six import iteritems, itervalues
from sympy import expand

LOGGER = logging.getLogger(__name__)


[docs]def find_transported_elements(rxn): """ Return a dictionary showing the amount of transported elements of a rxn. Collects the elements for each metabolite participating in a reaction, multiplies the amount by the metabolite's stoichiometry in the reaction and bins the result according to the compartment that metabolite is in. This produces a dictionary of dictionaries such as this ``{'p': {'C': -1, 'H': -4}, c: {'C': 1, 'H': 4}}`` which shows the transported entities. This dictionary is then simplified to only include the non-zero elements of one single compartment i.e. showing the precise elements that are transported. Parameters ---------- rxn : cobra.Reaction Any cobra.Reaction containing metabolites. """ element_dist = defaultdict() # Collecting elements for each metabolite. for met in rxn.metabolites: if met.compartment not in element_dist: # Multiplication by the metabolite stoichiometry. element_dist[met.compartment] = \ {k: v * rxn.metabolites[met] for (k, v) in iteritems(met.elements)} else: x = {k: v * rxn.metabolites[met] for (k, v) in iteritems(met.elements)} y = element_dist[met.compartment] element_dist[met.compartment] = \ {k: x.get(k, 0) + y.get(k, 0) for k in set(x) | set(y)} delta_dict = defaultdict() # Simplification of the resulting dictionary of dictionaries. for elements in itervalues(element_dist): delta_dict.update(elements) # Only non-zero values get included in the returned delta-dict. delta_dict = {k: abs(v) for (k, v) in iteritems(delta_dict) if v != 0} return delta_dict
[docs]def find_transport_reactions(model): """ Return a list of all transport reactions. Parameters ---------- model : cobra.Model The metabolic model under investigation. Notes ----- A transport reaction is defined as follows: 1. It contains metabolites from at least 2 compartments and 2. at least 1 metabolite undergoes no chemical reaction, i.e., the formula stays the same on both sides of the equation. This function will not identify transport via the PTS System. """ transport_reactions = [] for rxn in model.reactions: # Collecting criteria to classify transporters by. rxn_reactants = set([met.formula for met in rxn.reactants]) rxn_products = set([met.formula for met in rxn.products]) # Looking for formulas that stay the same on both side of the reaction. transported_mets = \ [formula for formula in rxn_reactants if formula in rxn_products] # Collect information on the elemental differences between # compartments in the reaction. delta_dicts = find_transported_elements(rxn) non_zero_array = [v for (k, v) in iteritems(delta_dicts) if v != 0] # Weeding out reactions such as oxidoreductases where no net # transport of Hydrogen is occurring, but rather just an exchange of # electrons or charges effecting a change in protonation. if set(transported_mets) != set('H') and list( delta_dicts.keys() ) == ['H']: pass # All other reactions for which the amount of transported elements is # not zero, which are not part of the model's exchange nor # biomass reactions, are defined as transport reactions. # This includes reactions where the transported metabolite reacts with # a carrier molecule. elif sum(non_zero_array) and rxn not in model.exchanges and \ rxn not in find_biomass_reaction(model): transport_reactions.append(rxn) return transport_reactions
[docs]def find_converting_reactions(model, pair): """ Find reactions which convert a given metabolite pair. Parameters ---------- model : cobra.Model The metabolic model under investigation. pair: tuple or list A pair of metabolite identifiers without compartment suffix. Returns ------- frozenset The set of reactions that have one of the pair on their left-hand side and the other on the right-hand side. """ met_ids = [m.id for m in model.metabolites] first = set(model.metabolites.get_by_id(m) for m in met_ids if m.startswith(pair[0])) second = set(model.metabolites.get_by_id(m) for m in met_ids if m.startswith(pair[1])) hits = list() for rxn in model.reactions: if len(first & set(rxn.reactants)) > 0 and len( second & set(rxn.products)) > 0: hits.append(rxn) elif len(first & set(rxn.products)) > 0 and len( second & set(rxn.reactants)) > 0: hits.append(rxn) return frozenset(hits)
# TODO: Improve the heuristics of identifying the biomass reaction(s)!!!
[docs]def find_biomass_reaction(model): """ Return a list of the biomass reaction(s) of the model. Parameters ---------- model : cobra.Model The metabolic model under investigation. """ return [rxn for rxn in model.reactions if "biomass" in rxn.id.lower()]
[docs]def df2dict(df): """Turn a `pandas.DataFrame` into a `dict` of lists.""" blob = dict((key, df[key].tolist()) for key in df.columns) blob["index"] = df.index.tolist() return blob
[docs]def find_demand_reactions(model): u""" Return a list of demand reactions. Parameters ---------- model : cobra.Model A cobrapy metabolic model Notes ----- [1] defines demand reactions as: -- 'unbalanced network reactions that allow the accumulation of a compound' -- reactions that are chiefly added during the gap-filling process -- as a means of dealing with 'compounds that are known to be produced by the organism [..] (i) for which no information is available about their fractional distribution to the biomass or (ii) which may only be produced in some environmental conditions -- reactions with a formula such as: 'met_c -> ' Demand reactions differ from exchange reactions in that the metabolites are not removed from the extracellular environment, but from any of the organism's compartments. References ---------- .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 """ demand_and_exchange_rxns = set(model.exchanges) return [rxn for rxn in demand_and_exchange_rxns if not rxn.reversibility and not any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_sink_reactions(model): u""" Return a list of sink reactions. Parameters ---------- model : cobra.Model A cobrapy metabolic model Notes ----- [1] defines sink reactions as: -- 'similar to demand reactions' but reversible, thus able to supply the model with metabolites -- reactions that are chiefly added during the gap-filling process -- as a means of dealing with 'compounds that are produced by nonmetabolic cellular processes but that need to be metabolized' -- reactions with a formula such as: 'met_c <-> ' Sink reactions differ from exchange reactions in that the metabolites are not removed from the extracellular environment, but from any of the organism's compartments. References ---------- .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 """ demand_and_exchange_rxns = set(model.exchanges) return [rxn for rxn in demand_and_exchange_rxns if rxn.reversibility and not any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_exchange_rxns(model): u""" Return a list of exchange reactions. Parameters ---------- model : cobra.Model A cobrapy metabolic model Notes ----- [1] defines exchange reactions as: -- reactions that 'define the extracellular environment' -- 'unbalanced, extra-organism reactions that represent the supply to or removal of metabolites from the extra-organism "space"' -- reactions with a formula such as: 'met_e -> ' or ' -> met_e' or 'met_e <=> ' Exchange reactions differ from demand reactions in that the metabolites are removed from or added to the extracellular environment only. With this the uptake or secretion of a metabolite is modeled, respectively. References ---------- .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 """ demand_and_exchange_rxns = set(model.exchanges) return [rxn for rxn in demand_and_exchange_rxns if any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_functional_units(gpr_str): """ Return an iterator of gene IDs grouped by boolean rules from the gpr_str. The gpr_str is first transformed into an algebraic expression, replacing the boolean operators 'or' with '+' and 'and' with '*'. Treating the gene IDs as sympy.symbols this allows a mathematical expansion of the algebraic expression. The expanded form is then split again producing sets of gene IDs that in the gpr_str had an 'and' relationship. Parameters ---------- gpr_str : string A string consisting of gene ids and the boolean expressions 'and' and 'or' """ algebraic_form = re.sub('[Oo]r', '+', re.sub('[Aa]nd', '*', gpr_str)) expanded = str(expand(algebraic_form)) for unit in expanded.replace('+', ',').split(' , '): yield unit.split('*')
[docs]def run_fba(model, rxn_id, direction="max", single_value=True): """ Return the solution of an FBA to a set objective function. Parameters ---------- model : cobra.Model A cobrapy metabolic model rxn_id : string A string containing the reaction ID of the desired FBA objective direction: string A string containing either "max" or "min" to specify the direction of the desired FBA objective function single_value: boolean Indicates whether the results for all reactions are gathered from the solver, or only the result for the objective value. Returns ------- cobra.solution The cobra solution object for the corresponding FBA problem """ model.objective = model.reactions.get_by_id(rxn_id) model.objective_direction = direction if single_value: try: return model.slim_optimize() except Infeasible: return np.nan else: try: solution = model.optimize() return solution except Infeasible: return np.nan
[docs]def close_boundaries_sensibly(model): """ Return a cobra model with all boundaries closed and changed constraints. In the returned model previously fixed reactions are no longer constrained as such. Instead reactions are constrained according to their reversibility. This is to prevent the FBA from becoming infeasible when trying to solve a model with closed exchanges and one fixed reaction. Parameters ---------- model : cobra.Model A cobrapy metabolic model Returns ------- cobra.Model A cobra model with all boundary reactions closed and the constraints of each reaction set according to their reversibility. """ with model: for rxn in model.reactions: if rxn.reversibility: rxn.bounds = -1, 1 else: rxn.bounds = 0, 1 for exchange in model.exchanges: exchange.bounds = (0, 0) return model.copy()