Source code for memote.support.biomass

# -*- coding: utf-8 -*-

# Copyright 2017 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Supporting functions for biomass consistency checks."""

from __future__ import absolute_import

import logging
import re

import numpy as np
from cobra.exceptions import OptimizationError
from future.utils import raise_with_traceback
from six import iteritems

import memote.support.helpers as helpers


__all__ = (
    "sum_biomass_weight",
    "find_biomass_precursors",
    "find_blocked_biomass_precursors",
)

LOGGER = logging.getLogger(__name__)

# 20 Amino Acids, 4 Deoxyribonucleotides, 4 Ribonucleotides,
# 8 Universal Cofactors, and H2O
ESSENTIAL_PRECURSOR_IDS = [
    "MNXM94",
    "MNXM55",
    "MNXM134",
    "MNXM76",
    "MNXM61",
    "MNXM97",
    "MNXM53",
    "MNXM114",
    "MNXM42",
    "MNXM142",
    "MNXM37",
    "MNXM89557",
    "MNXM231",
    "MNXM70",
    "MNXM78",
    "MNXM199",
    "MNXM140",
    "MNXM32",
    "MNXM29",
    "MNXM147",
    # Deoxyribonucleotides
    "MNXM286",
    "MNXM360",
    "MNXM394",
    "MNXM344",
    # Ribonucleotides
    "MNXM3",
    "MNXM51",
    "MNXM63",
    "MNXM121",
    # NAD
    "MNXM8",
    # NADP
    "MNXM5",
    # S-adenosyl-L-methionine
    "MNXM16",
    # FAD
    "MNXM33",
    # Pyridoxal 5'-phosphate
    "MNXM161",
    # CoA
    "MNXM12",
    # Thiamine Diphosphate
    "MNXM256",
    # FMN
    "MNXM119",
    # H2O
    "MNXM2",
]


[docs]def sum_biomass_weight(reaction): """ Compute the sum of all reaction compounds. This function expects all metabolites of the biomass reaction to have formula information assigned. Parameters ---------- reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- float The molecular weight of the biomass reaction in units of g/mmol. """ return ( sum( -coef * met.formula_weight for (met, coef) in iteritems(reaction.metabolites) ) / 1000.0
)
[docs]def find_biomass_precursors(model, reaction): """ Return a list of all biomass precursors excluding ATP and H2O. Parameters ---------- reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. model : cobra.Model The metabolic model under investigation. Returns ------- list Metabolite objects that are reactants of the biomass reaction excluding ATP and H2O. """ id_of_main_compartment = helpers.find_compartment_id_in_model(model, "c") gam_reactants = set() try: gam_reactants.update( [helpers.find_met_in_model(model, "MNXM3", id_of_main_compartment)[0]] ) except RuntimeError: pass try: gam_reactants.update( [helpers.find_met_in_model(model, "MNXM2", id_of_main_compartment)[0]] ) except RuntimeError: pass biomass_precursors = set(reaction.reactants) - gam_reactants return list(biomass_precursors)
[docs]def find_blocked_biomass_precursors(reaction, model): """ Return a list of all biomass precursors that cannot be produced. Parameters ---------- reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. model : cobra.Model The metabolic model under investigation. Returns ------- list Metabolite objects that are reactants of the biomass reaction excluding ATP and H2O that cannot be produced by flux balance analysis. """ LOGGER.debug("Finding blocked biomass precursors") precursors = find_biomass_precursors(model, reaction) blocked_precursors = list() _, ub = helpers.find_bounds(model) for precursor in precursors: with model: dm_rxn = model.add_boundary( precursor, type="safe-demand", reaction_id="safe_demand", lb=0, ub=ub ) if helpers.get_biomass_flux(model, dm_rxn.id) <= 0.0: blocked_precursors.append(precursor) return blocked_precursors
def gam_in_biomass(model, reaction): """ Return boolean if biomass reaction includes growth-associated maintenance. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- boolean True if the biomass reaction includes ATP and H2O as reactants and ADP, Pi and H as products, False otherwise. """ id_of_main_compartment = helpers.find_compartment_id_in_model(model, "c") try: left = { helpers.find_met_in_model(model, "MNXM3", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM2", id_of_main_compartment)[0], } right = { helpers.find_met_in_model(model, "MNXM7", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM1", id_of_main_compartment)[0], helpers.find_met_in_model(model, "MNXM9", id_of_main_compartment)[0], } except RuntimeError: return False return left.issubset(set(reaction.reactants)) and right.issubset( set(reaction.products) ) def find_direct_metabolites(model, reaction, tolerance=1e-06): """ Return list of possible direct biomass precursor metabolites. The term direct metabolites describes metabolites that are involved only in either transport and/or boundary reactions, AND the biomass reaction(s), but not in any purely metabolic reactions. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.Reaction The biomass reaction of the model under investigation. tolerance : float, optional Tolerance below which values will be regarded as zero. Returns ------- list Metabolites that qualify as direct metabolites i.e. biomass precursors that are taken up to be consumed by the biomass reaction only. """ biomass_rxns = set(helpers.find_biomass_reaction(model)) tra_bou_bio_rxns = helpers.find_interchange_biomass_reactions(model, biomass_rxns) try: precursors = find_biomass_precursors(model, reaction) main_comp = helpers.find_compartment_id_in_model(model, "c") ext_space = helpers.find_compartment_id_in_model(model, "e") except KeyError: LOGGER.error( "Failed to properly identify cytosolic and extracellular " "compartments." ) raise_with_traceback( KeyError( "The cytosolic and/or extracellular " "compartments could not be identified." ) ) except RuntimeError: LOGGER.error( "Failed to properly identify cytosolic and extracellular " "compartments." ) raise_with_traceback( RuntimeError( "The cytosolic and/or extracellular " "compartments could not be " "identified." ) ) else: tra_bou_bio_mets = [ met for met in precursors if met.reactions.issubset(tra_bou_bio_rxns) ] rxns_of_interest = set( [ rxn for met in tra_bou_bio_mets for rxn in met.reactions if rxn not in biomass_rxns ] ) solution = model.optimize(raise_error=True) if np.isclose(solution.objective_value, 0, atol=tolerance): LOGGER.error( "Failed to generate a non-zero objective value with " "flux balance analysis." ) raise OptimizationError( "The flux balance analysis on this model returned an " "objective value of zero. Make sure the model can " "grow! Check if the constraints are not too strict!" ) tra_bou_bio_fluxes = {r: solution[r.id] for r in rxns_of_interest} met_flux_sum = {m: 0 for m in tra_bou_bio_mets} return detect_false_positive_direct_metabolites( tra_bou_bio_mets, biomass_rxns, main_comp, ext_space, tra_bou_bio_fluxes, met_flux_sum, ) def detect_false_positive_direct_metabolites( candidates, biomass_reactions, cytosol, extra, reaction_fluxes, metabolite_fluxes ): """ Weed out false positive direct metabolites. False positives exists in the extracellular compartment with flux from the cytosolic compartment and are part of the biomass reaction(s). It sums fluxes positively or negatively depending on if direct metabolites in the extracellular compartment are defined as reactants or products in various reactions. Parameters ---------- candidates : list of cobra.Metabolite Candidate direct metabolites. biomass_reactions : set of cobra.Reaction The biomass reactions. Usually one or two. cytosol : str The identifier of the cytosolic compartment. extra : str The identifier of the extracellular compartment. Returns ------- list Definitive list of direct metabolites, i.e., biomass precursors that are taken up to be consumed by the biomass reaction only. """ for met in candidates: is_internal = met.compartment != extra for rxn in met.reactions: if rxn in biomass_reactions: continue # Internal metabolites can not be false positives. if is_internal: metabolite_fluxes[met] += abs(reaction_fluxes[rxn]) continue # if the metabolite is in the "e" compartment and a reactant, # sum the fluxes accordingly (outward=negative, inward=positive) if met in rxn.reactants: product_comps = set([p.compartment for p in rxn.products]) # if the reaction has no product (outward flux) if len(product_comps) == 0: metabolite_fluxes[met] += -reaction_fluxes[rxn] # if the reaction has a product in "c" (inward flux) elif cytosol in product_comps: metabolite_fluxes[met] += reaction_fluxes[rxn] # if the metabolite is in the "e" compartment and a product, # sum the fluxes accordingly (outward=negative, inward=positive) elif met in rxn.products: reactant_comps = set([p.compartment for p in rxn.reactants]) # if the reaction has no reactant (inward flux) if len(reactant_comps) == 0: metabolite_fluxes[met] += reaction_fluxes[rxn] # if the reaction has a reactant in "c" (outward flux) elif cytosol in reactant_comps: metabolite_fluxes[met] += -reaction_fluxes[rxn] return [m for m, f in iteritems(metabolite_fluxes) if f > 0] def bundle_biomass_components(model, reaction): """ Return bundle biomass component reactions if it is not one lumped reaction. There are two basic ways of specifying the biomass composition. The most common is a single lumped reaction containing all biomass precursors. Alternatively, the biomass equation can be split into several reactions each focusing on a different macromolecular component for instance a (1 gDW ash) + b (1 gDW phospholipids) + c (free fatty acids)+ d (1 gDW carbs) + e (1 gDW protein) + f (1 gDW RNA) + g (1 gDW DNA) + h (vitamins/cofactors) + xATP + xH2O-> 1 gDCW biomass + xADP + xH + xPi. This function aims to identify if the given biomass reaction 'reaction', is a lumped all-in-one reaction, or whether it is just the final composing reaction of all macromolecular components. It is important to identify which other reaction belong to a given biomass reaction to be able to identify universal biomass components or calculate detailed precursor stoichiometries. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- list One or more reactions that qualify as THE biomass equation together. Notes ----- Counting H2O, ADP, Pi, H, and ATP, the amount of metabolites in a split reaction is comparatively low: Any reaction with less or equal to 15 metabolites can probably be counted as a split reaction containing Ash, Phospholipids, Fatty Acids, Carbohydrates (i.e. cell wall components), Protein, RNA, DNA, Cofactors and Vitamins, and Small Molecules. Any reaction with more than or equal to 28 metabolites, however, (21 AA + 3 Nucleotides (4-ATP) + 4 Deoxy-Nucleotides) can be considered a lumped reaction. Anything in between will be treated conservatively as a lumped reaction. For split reactions, after removing any of the metabolites associated with growth-associated energy expenditure (H2O, ADP, Pi, H, and ATP), the only remaining metabolites should be generalized macromolecule precursors e.g. Protein, Phospholipids etc. Each of these have their own composing reactions. Hence we include the reactions of these metabolites in the set that ultimately makes up the returned list of reactions that together make up the biomass equation. """ if len(reaction.metabolites) >= 16: return [reaction] id_of_main_compartment = helpers.find_compartment_id_in_model(model, "c") gam_mets = ["MNXM3", "MNXM2", "MNXM7", "MNXM1", "MNXM9"] try: gam = set( [ helpers.find_met_in_model(model, met, id_of_main_compartment)[0] for met in gam_mets ] ) except RuntimeError: gam = set() regex = re.compile("^{}(_[a-zA-Z]+?)*?$".format("biomass"), re.IGNORECASE) biomass_metabolite = set(model.metabolites.query(regex)) macromolecules = set(reaction.metabolites) - gam - biomass_metabolite bundled_reactions = set() for met in macromolecules: bundled_reactions = bundled_reactions | set(met.reactions) return list(bundled_reactions) def essential_precursors_not_in_biomass(model, reaction): """ Return a list of essential precursors missing from the biomass reaction. There are universal components of life that make up the biomass of all known organisms. These include all proteinogenic amino acids, deoxy- and ribonucleotides, water and a range of metabolic cofactors. Parameters ---------- model : cobra.Model The metabolic model under investigation. reaction : cobra.core.reaction.Reaction The biomass reaction of the model under investigation. Returns ------- list IDs of essential metabolites missing from the biomass reaction. The IDS will appear in the models namespace if the metabolite exists, but will be using the MetaNetX namespace if the metabolite does not exist in the model. Notes ----- "Answering the question of what to include in the core of a biomass objective function is not always straightforward. One example is different nucleotide forms, which, although inter-convertible, are essential for cellular chemistry. We propose here that all essential and irreplaceable molecules for metabolism should be included in the biomass functions of genome scale metabolic models. In the special case of cofactors, when two forms of the same cofactor take part in the same reactions (such as NAD and NADH), only one form could be included for the sake of simplicity. When a class of cofactors includes active and non-active interconvertible forms, the active forms should be preferred. [1]_." Please note, that [1]_ also suggest to count C1 carriers (derivatives of tetrahydrofolate(B9) or tetrahydromethanopterin) as universal cofactors. We have omitted these from this check because there are many individual compounds that classify as C1 carriers, and it is not clear a priori which one should be preferred. In a future update, we may consider identifying these using a chemical ontology. References ---------- .. [1] Xavier, J. C., Patil, K. R., & Rocha, I. (2017). Integration of Biomass Formulations of Genome-Scale Metabolic Models with Experimental Data Reveals Universally Essential Cofactors in Prokaryotes. Metabolic Engineering, 39(October 2016), 200–208. http://doi.org/10.1016/j.ymben.2016.12.002 """ main_comp = helpers.find_compartment_id_in_model(model, "c") biomass_eq = bundle_biomass_components(model, reaction) pooled_precursors = set([met for rxn in biomass_eq for met in rxn.metabolites]) missing_essential_precursors = [] for mnx_id in ESSENTIAL_PRECURSOR_IDS: try: met = helpers.find_met_in_model(model, mnx_id, main_comp)[0] if met not in pooled_precursors: missing_essential_precursors.append(met.id) except RuntimeError: missing_essential_precursors.append(mnx_id) return missing_essential_precursors