Source code for memote.support.helpers

# -*- coding: utf-8 -*-

# Copyright 2016 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Helper functions that are used all over the memote package."""

from __future__ import absolute_import

import logging
import re
from builtins import dict
from collections import defaultdict

from six import iteritems, itervalues
from sympy import expand

LOGGER = logging.getLogger(__name__)


[docs]def find_transported_elements(rxn): """ Return a dictionary showing the amount of transported elements of a rxn. Collects the elements for each metabolite participating in a reaction, multiplies the amount by the metabolite's stoichiometry in the reaction and bins the result according to the compartment that metabolite is in. This produces a dictionary of dictionaries such as this ``{'p': {'C': -1, 'H': -4}, c: {'C': 1, 'H': 4}}`` which shows the transported entities. This dictionary is then simplified to only include the non-zero elements of one single compartment i.e. showing the precise elements that are transported. Parameters ---------- rxn : cobra.Reaction Any cobra.Reaction containing metabolites. """ element_dist = defaultdict() # Collecting elements for each metabolite. for met in rxn.metabolites: if met.compartment not in element_dist: # Multiplication by the metabolite stoichiometry. element_dist[met.compartment] = \ {k: v * rxn.metabolites[met] for (k, v) in iteritems(met.elements)} else: x = {k: v * rxn.metabolites[met] for (k, v) in iteritems(met.elements)} y = element_dist[met.compartment] element_dist[met.compartment] = \ {k: x.get(k, 0) + y.get(k, 0) for k in set(x) | set(y)} delta_dict = defaultdict() # Simplification of the resulting dictionary of dictionaries. for elements in itervalues(element_dist): delta_dict.update(elements) # Only non-zero values get included in the returned delta-dict. delta_dict = {k: abs(v) for (k, v) in iteritems(delta_dict) if v != 0} return delta_dict
[docs]def find_transport_reactions(model): """ Return a list of all transport reactions. Parameters ---------- model : cobra.Model The metabolic model under investigation. Notes ----- A transport reaction is defined as follows: 1. It contains metabolites from at least 2 compartments and 2. at least 1 metabolite undergoes no chemical reaction, i.e., the formula stays the same on both sides of the equation. This function will not identify transport via the PTS System. """ transport_reactions = [] for rxn in model.reactions: # Collecting criteria to classify transporters by. rxn_reactants = set([met.formula for met in rxn.reactants]) rxn_products = set([met.formula for met in rxn.products]) # Looking for formulas that stay the same on both side of the reaction. transported_mets = \ [formula for formula in rxn_reactants if formula in rxn_products] # Collect information on the elemental differences between # compartments in the reaction. delta_dicts = find_transported_elements(rxn) non_zero_array = [v for (k, v) in iteritems(delta_dicts) if v != 0] # Weeding out reactions such as oxidoreductases where no net # transport of Hydrogen is occurring, but rather just an exchange of # electrons or charges effecting a change in protonation. if set(transported_mets) != set('H') and list( delta_dicts.keys() ) == ['H']: pass # All other reactions for which the amount of transported elements is # not zero, which are not part of the model's exchange nor # biomass reactions, are defined as transport reactions. # This includes reactions where the transported metabolite reacts with # a carrier molecule. elif sum(non_zero_array) and rxn not in model.exchanges and \ rxn not in find_biomass_reaction(model): transport_reactions.append(rxn) return transport_reactions
[docs]def find_converting_reactions(model, pair): """ Find reactions which convert a given metabolite pair. Parameters ---------- model : cobra.Model The metabolic model under investigation. pair: tuple or list A pair of metabolite identifiers without compartment suffix. Returns ------- frozenset The set of reactions that have one of the pair on their left-hand side and the other on the right-hand side. """ met_ids = [m.id for m in model.metabolites] first = set(model.metabolites.get_by_id(m) for m in met_ids if m.startswith(pair[0])) second = set(model.metabolites.get_by_id(m) for m in met_ids if m.startswith(pair[1])) hits = list() for rxn in model.reactions: if len(first & set(rxn.reactants)) > 0 and len( second & set(rxn.products)) > 0: hits.append(rxn) elif len(first & set(rxn.products)) > 0 and len( second & set(rxn.reactants)) > 0: hits.append(rxn) return frozenset(hits)
[docs]def find_biomass_reaction(model): """ Return a list of the biomass reaction(s) of the model. Parameters ---------- model : cobra.Model The metabolic model under investigation. """ return [rxn for rxn in model.reactions if "biomass" in rxn.id.lower()]
[docs]def df2dict(df): """Turn a `pandas.DataFrame` into a `dict` of lists.""" blob = dict((key, df[key].tolist()) for key in df.columns) blob["index"] = df.index.tolist() return blob
[docs]def find_demand_reactions(model): u""" Return a list of demand reactions. Parameters ---------- model : cobra.Model A cobrapy metabolic model Notes ----- [1] defines demand reactions as: -- 'unbalanced network reactions that allow the accumulation of a compound' -- reactions that are chiefly added during the gap-filling process -- as a means of dealing with 'compounds that are known to be produced by the organism [..] (i) for which no information is available about their fractional distribution to the biomass or (ii) which may only be produced in some environmental conditions -- reactions with a formula such as: 'met_c -> ' Demand reactions differ from exchange reactions in that the metabolites are not removed from the extracellular environment, but from any of the organism's compartments. References ---------- .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 """ demand_and_exchange_rxns = set(model.exchanges) return [rxn for rxn in demand_and_exchange_rxns if not rxn.reversibility and not any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_sink_reactions(model): u""" Return a list of sink reactions. Parameters ---------- model : cobra.Model A cobrapy metabolic model Notes ----- [1] defines sink reactions as: -- 'similar to demand reactions' but reversible, thus able to supply the model with metabolites -- reactions that are chiefly added during the gap-filling process -- as a means of dealing with 'compounds that are produced by nonmetabolic cellular processes but that need to be metabolized' -- reactions with a formula such as: 'met_c <-> ' Sink reactions differ from exchange reactions in that the metabolites are not removed from the extracellular environment, but from any of the organism's compartments. References ---------- .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 """ demand_and_exchange_rxns = set(model.exchanges) return [rxn for rxn in demand_and_exchange_rxns if rxn.reversibility and not any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_exchange_rxns(model): u""" Return a list of exchange reactions. Parameters ---------- model : cobra.Model A cobrapy metabolic model Notes ----- [1] defines exchange reactions as: -- reactions that 'define the extracellular environment' -- 'unbalanced, extra-organism reactions that represent the supply to or removal of metabolites from the extra-organism "space"' -- reactions with a formula such as: 'met_e -> ' or ' -> met_e' or 'met_e <=> ' Exchange reactions differ from demand reactions in that the metabolites are removed from or added to the extracellular environment only. With this the uptake or secretion of a metabolite is modeled, respectively. References ---------- .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 """ demand_and_exchange_rxns = set(model.exchanges) return [rxn for rxn in demand_and_exchange_rxns if any(c in rxn.get_compartments() for c in ['e'])]
[docs]def find_functional_units(gpr_str): """ Return an iterator of gene IDs grouped by boolean rules from the gpr_str. The gpr_str is first transformed into an algebraic expression, replacing the boolean operators 'or' with '+' and 'and' with '*'. Treating the gene IDs as sympy.symbols this allows a mathematical expansion of the algebraic expression. The expanded form is then split again producing sets of gene IDs that in the gpr_str had an 'and' relationship. Parameters ---------- gpr_str : string A string consisting of gene ids and the boolean expressions 'and' and 'or' """ algebraic_form = re.sub('[Oo]r', '+', re.sub('[Aa]nd', '*', gpr_str)) expanded = str(expand(algebraic_form)) for unit in expanded.replace('+', ',').split(' , '): yield unit.split('*')