Source code for test_biomass

# -*- coding: utf-8 -*-

# Copyright 2017 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Biomass tests performed on an instance of ``cobra.Model``.

N.B.: We parametrize each function here with the identified biomass reactions.
In the storage of test results we rely on the order of the biomass fixtures
to remain the same as the parametrized test cases.
"""

from __future__ import absolute_import, division

import logging

import pytest
from cobra.exceptions import OptimizationError

import memote.support.biomass as biomass
import memote.support.helpers as helpers
from memote.utils import annotate, get_ids, truncate, wrapper


[docs]LOGGER = logging.getLogger(__name__)
@annotate(title="Biomass Reactions Identified", format_type="count")
[docs]def test_biomass_presence(model): """ Expect the model to contain at least one biomass reaction. The biomass composition aka biomass formulation aka biomass reaction is a common pseudo-reaction accounting for biomass synthesis in constraints-based modelling. It describes the stoichiometry of intracellular compounds that are required for cell growth. While this reaction may not be relevant to modeling the metabolism of higher organisms, it is essential for single-cell modeling. Implementation: Identifies possible biomass reactions using two principal steps: 1. Return reactions that include the SBO annotation "SBO:0000629" for biomass. If no reactions can be identifies this way: 1. Look for the ``buzzwords`` "biomass", "growth" and "bof" in reaction IDs. 2. Look for metabolite IDs or names that contain the ``buzzword`` "biomass" and obtain the set of reactions they are involved in. 3. Remove boundary reactions from this set. 4. Return the union of reactions that match the buzzwords and of the reactions that metabolites are involved in that match the buzzword. This test checks if at least one biomass reaction is present. """ ann = test_biomass_presence.annotation ann["data"] = [rxn.id for rxn in helpers.find_biomass_reaction(model)] outcome = len(ann["data"]) > 0 ann["metric"] = 1.0 - float(outcome) ann["message"] = wrapper.fill( """In this model {} the following biomass reactions were identified: {}""".format( len(ann["data"]), truncate(ann["data"]) ) ) assert outcome, ann["message"]
@pytest.mark.biomass @annotate( title="Biomass Consistency", format_type="number", data=dict(), message=dict(), metric=dict(),
[docs]) def test_biomass_consistency(model, reaction_id): """ Expect biomass components to sum up to 1 g[CDW]. This test only yields sensible results if all biomass precursor metabolites have chemical formulas assigned to them. The molecular weight of the biomass reaction in metabolic models is defined to be equal to 1 g/mmol. Conforming to this is essential in order to be able to reliably calculate growth yields, to cross-compare models, and to obtain valid predictions when simulating microbial consortia. A deviation from 1 - 1E-03 to 1 + 1E-06 is accepted. Implementation: Multiplies the coefficient of each metabolite of the biomass reaction with its molecular weight calculated from the formula, then divides the overall sum of all the products by 1000. """ ann = test_biomass_consistency.annotation reaction = model.reactions.get_by_id(reaction_id) try: ann["data"][reaction_id] = biomass.sum_biomass_weight(reaction) except TypeError: ann["data"][reaction_id] = None ann["message"][reaction_id] = wrapper.fill( """One or more of the biomass components do not have a defined formula or contain unspecified chemical groups.""" ) else: ann["message"][reaction_id] = wrapper.fill( """The component molar mass of the biomass reaction {} sums up to {} which is outside of the 1e-03 margin from 1 mmol / g[CDW] / h. """.format( reaction_id, ann["data"][reaction_id] ) ) outcome = (1 - 1e-03) < ann["data"][reaction_id] < (1 + 1e-06) ann["metric"][reaction_id] = 1.0 - float(outcome) # To account for numerical inaccuracies, a range from 1-1e0-3 to 1+1e-06 # is implemented in the assertion check assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass @annotate( title="Biomass Production In Default Medium", format_type="number", data=dict(), message=dict(), metric=dict(),
[docs]) def test_biomass_default_production(model, reaction_id): """ Expect biomass production in default medium. Using flux balance analysis this test optimizes the model for growth in the medium that is set by default. Any non-zero growth rate is accepted to pass this test. Implementation: Calculate the solution of FBA with the biomass reaction set as objective function and the model's default constraints. """ ann = test_biomass_default_production.annotation ann["data"][reaction_id] = helpers.get_biomass_flux(model, reaction_id) outcome = ann["data"][reaction_id] > model.tolerance ann["metric"][reaction_id] = 1.0 - float(outcome) ann["message"][reaction_id] = wrapper.fill( """Using the biomass reaction {} this is the growth rate (1/h) that can be achieved when the model is simulated on the provided default medium: {} """.format( reaction_id, ann["data"][reaction_id] ) ) assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass @annotate( title="Biomass Production In Complete Medium", format_type="number", data=dict(), message=dict(), metric=dict(),
[docs]) def test_biomass_open_production(model, reaction_id): """ Expect biomass production in complete medium. Using flux balance analysis this test optimizes the model for growth using a complete medium i.e. unconstrained boundary reactions. Any non-zero growth rate is accepted to pass this test. Implementation: Calculate the solution of FBA with the biomass reaction set as objective function and after removing any constraints from all boundary reactions. """ ann = test_biomass_open_production.annotation helpers.open_boundaries(model) ann["data"][reaction_id] = helpers.get_biomass_flux(model, reaction_id) outcome = ann["data"][reaction_id] > model.tolerance ann["metric"][reaction_id] = 1.0 - float(outcome) ann["message"][reaction_id] = wrapper.fill( """Using the biomass reaction {} this is the growth rate that can be achieved when the model is simulated on a complete medium i.e. with all the boundary reactions unconstrained: {} """.format( reaction_id, ann["data"][reaction_id] ) ) assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass @annotate( title="Blocked Biomass Precursors In Default Medium", format_type="count", data=dict(), metric=dict(), message=dict(),
[docs]) def test_biomass_precursors_default_production(model, reaction_id): """ Expect production of all biomass precursors in default medium. Using flux balance analysis this test optimizes for the production of each metabolite that is a substrate of the biomass reaction with the exception of atp and h2o. Optimizations are carried out using the default conditions. This is useful when reconstructing the precursor biosynthesis pathways of a metabolic model. To pass this test, the model should be able to synthesis all the precursors. Implementation: For each biomass precursor (except ATP and H2O) add a temporary demand reaction, then carry out FBA with this reaction as the objective. Collect all metabolites for which this optimization is equal to zero or infeasible. """ ann = test_biomass_precursors_default_production.annotation reaction = model.reactions.get_by_id(reaction_id) ann["data"][reaction_id] = get_ids( biomass.find_blocked_biomass_precursors(reaction, model) ) ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / len( biomass.find_biomass_precursors(model, reaction) ) ann["message"][reaction_id] = wrapper.fill( """Using the biomass reaction {} and when the model is simulated on the provided default medium a total of {} precursors ({:.2%} of all precursors except h2o and atp) cannot be produced: {} """.format( reaction_id, len(ann["data"][reaction_id]), ann["metric"][reaction_id], ann["data"][reaction_id], ) ) assert len(ann["data"][reaction_id]) == 0, ann["message"][reaction_id]
@pytest.mark.biomass @annotate( title="Blocked Biomass Precursors In Complete Medium", format_type="count", data=dict(), metric=dict(), message=dict(),
[docs]) def test_biomass_precursors_open_production(model, reaction_id): """ Expect precursor production in complete medium. Using flux balance analysis this test optimizes for the production of each metabolite that is a substrate of the biomass reaction with the exception of atp and h2o. Optimizations are carried out using a complete medium i.e. unconstrained boundary reactions. This is useful when reconstructing the precursor biosynthesis pathways of a metabolic model. To pass this test, the model should be able to synthesis all the precursors. Implementation: First remove any constraints from all boundary reactions, then for each biomass precursor (except ATP and H2O) add a temporary demand reaction, then carry out FBA with this reaction as the objective. Collect all metabolites for which this optimization is below or equal to zero or is infeasible. """ ann = test_biomass_precursors_open_production.annotation helpers.open_boundaries(model) reaction = model.reactions.get_by_id(reaction_id) ann["data"][reaction_id] = get_ids( biomass.find_blocked_biomass_precursors(reaction, model) ) ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / len( biomass.find_biomass_precursors(model, reaction) ) ann["message"][reaction_id] = wrapper.fill( """Using the biomass reaction {} and when the model is simulated in complete medium a total of {} precursors ({:.2%} of all precursors except h2o and atp) cannot be produced: {} """.format( reaction_id, len(ann["data"][reaction_id]), ann["metric"][reaction_id], ann["data"][reaction_id], ) ) assert len(ann["data"][reaction_id]) == 0, ann["message"][reaction_id]
@pytest.mark.biomass @annotate( title="Growth-associated Maintenance in Biomass Reaction", format_type="raw", data=dict(), message=dict(), metric=dict(),
[docs]) def test_gam_in_biomass(model, reaction_id): """ Expect the biomass reactions to contain ATP and ADP. The growth-associated maintenance (GAM) term accounts for the energy in the form of ATP that is required to synthesize macromolecules such as Proteins, DNA and RNA, and other processes during growth. A GAM term is therefore a requirement for any well-defined biomass reaction. There are different ways to implement this term depending on what kind of experimental data is available and the preferred way of implementing the biomass reaction: - Chemostat growth experiments yield a single GAM value representing the required energy per gram of biomass (Figure 6 of [1]_). This can be implemented in a lumped biomass reaction or in the final term of a split biomass reaction. - Experimentally delineating or estimating the GAM requirements for each macromolecule separately is possible, yet requires either data from multi-omics experiments [2]_ or detailed resources [1]_ , respectively. Individual energy requirements can either be implemented in a split biomass equation on the term for each macromolecule, or, on the basis of the biomass composition, they can be summed into a single GAM value for growth and treated as mentioned above. This test is only able to detect if a lumped biomass reaction or the final term of a split biomass reaction contains this term. Hence, it will only detect the use of a single GAM value as opposed to individual energy requirements of each macromolecule. Both approaches, however, have its merits. Implementation: Determines the metabolite identifiers of ATP, ADP, H2O, HO4P and H+ based on an internal mapping table. Checks if ATP and H2O are a subset of the reactants and ADP, HO4P and H+ a subset of the products of the biomass reaction. References: .. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for generating a high-quality genome-scale metabolic reconstruction. Nature protocols. Nature Publishing Group. http://doi.org/10.1038/nprot.2009.203 .. [2] Hackett, S. R., Zanotelli, V. R. T., Xu, W., Goya, J., Park, J. O., Perlman, D. H., Gibney, P. A., Botstein, D., Storey, J. D., Rabinowitz, J. D. (2010, January). Systems-level analysis of mechanisms regulating yeast metabolic flux Science http://doi.org/10.1126/science.aaf2786 """ ann = test_gam_in_biomass.annotation reaction = model.reactions.get_by_id(reaction_id) outcome = biomass.gam_in_biomass(model, reaction) ann["data"][reaction_id] = outcome ann["metric"][reaction_id] = 1.0 - float(outcome) if outcome: ann["message"][reaction_id] = wrapper.fill( """Yes, {} contains a term for growth-associated maintenance. """.format( reaction_id ) ) else: ann["message"][reaction_id] = wrapper.fill( """No, {} does not contain a term for growth-associated maintenance.""".format( reaction_id ) ) assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass @annotate( title="Unrealistic Growth Rate In Default Medium", format_type="raw", data=dict(), message=dict(), metric=dict(),
[docs]) def test_fast_growth_default(model, reaction_id): """ Expect the predicted growth rate for each BOF to be below 2.81. The growth rate of a metabolic model should not be faster than that of the fastest growing organism. This is based on a doubling time of Vibrio natriegens which was reported to be 14.8 minutes by: Henry H. Lee, Nili Ostrov, Brandon G. Wong, Michaela A. Gold, Ahmad S. Khalil, George M. Church in https://www.biorxiv.org/content/biorxiv/early/2016/06/12/058487.full.pdf The calculation ln(2)/(14.8/60) ~ 2.81 yields the corresponding growth rate. Implementation: Calculate the solution of FBA with the biomass reaction set as objective function and a model's default constraints. Then check if the objective value is higher than 2.81. """ ann = test_fast_growth_default.annotation outcome = helpers.get_biomass_flux(model, reaction_id) > 2.81 ann["data"][reaction_id] = outcome ann["metric"][reaction_id] = 1.0 - float(outcome) if ann["data"][reaction_id]: ann["message"][reaction_id] = wrapper.fill( """Using the biomass reaction {} and when the model is simulated on the provided default medium the growth rate is *higher* than that of the fastest bacteria. This could be due to inconsistencies in the network or missing constraints.""".format( reaction_id ) ) else: ann["message"][reaction_id] = wrapper.fill( """Using the biomass reaction {} and when the model is simulated on the provided default medium the growth rate is *lower* than that of the fastest bacteria. This is to be expected for a majority of organisms.""".format( reaction_id ) ) assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass @annotate( title="Ratio of Direct Metabolites in Biomass Reaction", format_type="percent", data=dict(), message=dict(), metric=dict(),
[docs]) def test_direct_metabolites_in_biomass(model, reaction_id): """ Expect the ratio of direct metabolites to be below 0.5. Some biomass precursors are taken from the media and directly consumed by the biomass reaction. It might not be a problem for ions or metabolites for which the organism in question is auxotrophic. However, too many of these metabolites may be artifacts of automated gap-filling procedures. Many gap-filling algorithms attempt to minimise the number of added reactions. This can lead to many biomass precursors being "direct metabolites". This test reports the ratio of direct metabolites to the total amount of precursors to a given biomass reaction. It specifically looks for metabolites that are only in either exchange, transport or biomass reactions. Bear in mind that this may lead to false positives in heavily compartimentalized models. To pass this test, the ratio of direct metabolites should be less than 50% of all biomass precursors. This is an arbitrary threshold but it takes into account that while certain ions do not serve a relevant metabolic function, it may still be important to include them in the biomass reaction to account for the impact of their uptake energy costs. This threshold is subject to change in the future. Implementation: Identify biomass precursors (excluding ATP and H+), identify cytosol and extracellular compartment from an internal mapping table. Then, determine which precursors is only involved in transport, boundary and biomass reactions. Using FBA with the biomass function as the objective then determine whether the metabolite is taken up only to be consumed by the biomass reaction. """ # TODO: Update the threshold as soon as we have an overview of the average! ann = test_direct_metabolites_in_biomass.annotation reaction = model.reactions.get_by_id(reaction_id) try: ann["data"][reaction_id] = get_ids( biomass.find_direct_metabolites(model, reaction) ) except OptimizationError: ann["data"][reaction_id] = [] ann["metric"][reaction_id] = 1.0 ann["message"][reaction_id] = "This model does not grow." pytest.skip(ann["message"]) ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / len( biomass.find_biomass_precursors(model, reaction) ) ann["message"][reaction_id] = wrapper.fill( """{} contains a total of {} direct metabolites ({:.2%}). Specifically these are: {}. """.format( reaction_id, len(ann["data"][reaction_id]), ann["metric"][reaction_id], ann["data"][reaction_id], ) ) assert ann["metric"][reaction_id] < 0.5, ann["message"][reaction_id]
@pytest.mark.biomass @annotate( title="Number of Missing Essential Biomass Precursors", format_type="count", data=dict(), message=dict(), metric=dict(),
[docs]) def test_essential_precursors_not_in_biomass(model, reaction_id): """ Expect the biomass reaction to contain all essential precursors. There are universal components of life that make up the biomass of all known organisms. These include all proteinogenic amino acids, deoxy- and ribonucleotides, water and a range of metabolic cofactors. This test reports the amount of biomass precursors that have been reported to be essential constituents of the biomass equation. All of the following precursors need to be included in the biomass reaction to pass the test: Aminoacids: trp__L, cys__L, his__L, tyr__L, met__L, phe__L, ser__L, pro__L, asp__L, thr__L, gln__L, glu__L, ile__L, arg__L, lys__L, val__L, leu__L, ala__L, gly, asn__L DNA: datp, dctp, dttp, dgtp RNA: atp, ctp, utp, gtp Cofactors: nad, nadp, amet, fad, pydx5p, coa, thmpp, fmn and h2o These metabolites were selected based on the results presented by DOI:10.1016/j.ymben.2016.12.002 Please note, that the authors also suggest to count C1 carriers (derivatives of tetrahydrofolate(B9) or tetrahydromethanopterin) as universal cofactors. We have omitted these from this check because there are many individual compounds that classify as C1 carriers, and it is not clear a priori which one should be preferred. In a future update, we may consider identifying these using a chemical ontology. Implementation: Determine whether the model employs a lumped or split biomass reaction. Then, using an internal mapping table, try to identify the above list of essential precursors in list of precursor metabolites of either type of biomass reaction. List IDs in the models namespace if the metabolite exists, else use the MetaNetX namespace if the metabolite does not exist in the model. Identifies the cytosol from an internal mapping table, and assumes that all precursors exist in that compartment. """ ann = test_essential_precursors_not_in_biomass.annotation reaction = model.reactions.get_by_id(reaction_id) ann["data"][reaction_id] = [ m for m in biomass.essential_precursors_not_in_biomass(model, reaction) ] ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / len( biomass.find_biomass_precursors(model, reaction) ) ann["message"][reaction_id] = wrapper.fill( """{} lacks a total of {} essential metabolites ({:.2%} of all biomass precursors). Specifically these are: {}.""".format( reaction_id, len(ann["data"][reaction_id]), ann["metric"][reaction_id], ann["data"][reaction_id], ) ) assert len(ann["data"][reaction_id]) == 0, ann["message"][reaction_id]