# -*- coding: utf-8 -*-
# Copyright 2017 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Biomass tests performed on an instance of ``cobra.Model``.
N.B.: We parametrize each function here with the identified biomass reactions.
In the storage of test results we rely on the order of the biomass fixtures
to remain the same as the parametrized test cases.
"""
from __future__ import absolute_import, division
import logging
import pytest
from cobra.exceptions import OptimizationError
import memote.support.biomass as biomass
import memote.support.helpers as helpers
from memote.utils import annotate, get_ids, truncate, wrapper
[docs]LOGGER = logging.getLogger(__name__)
@annotate(title="Biomass Reactions Identified", format_type="count")
[docs]def test_biomass_presence(model):
"""
Expect the model to contain at least one biomass reaction.
The biomass composition aka biomass formulation aka biomass reaction
is a common pseudo-reaction accounting for biomass synthesis in
constraints-based modelling. It describes the stoichiometry of
intracellular compounds that are required for cell growth. While this
reaction may not be relevant to modeling the metabolism of higher
organisms, it is essential for single-cell modeling.
Implementation:
Identifies possible biomass reactions using two principal steps:
1. Return reactions that include the SBO annotation "SBO:0000629" for
biomass.
If no reactions can be identifies this way:
1. Look for the ``buzzwords`` "biomass", "growth" and "bof" in reaction
IDs.
2. Look for metabolite IDs or names that contain the ``buzzword``
"biomass" and obtain the set of reactions they are involved in.
3. Remove boundary reactions from this set.
4. Return the union of reactions that match the buzzwords and of the
reactions that metabolites are involved in that match the buzzword.
This test checks if at least one biomass reaction is present.
"""
ann = test_biomass_presence.annotation
ann["data"] = [rxn.id for rxn in helpers.find_biomass_reaction(model)]
outcome = len(ann["data"]) > 0
ann["metric"] = 1.0 - float(outcome)
ann["message"] = wrapper.fill(
"""In this model {} the following biomass reactions were
identified: {}""".format(
len(ann["data"]), truncate(ann["data"])
)
)
assert outcome, ann["message"]
@pytest.mark.biomass
@annotate(
title="Biomass Consistency",
format_type="number",
data=dict(),
message=dict(),
metric=dict(),
[docs])
def test_biomass_consistency(model, reaction_id):
"""
Expect biomass components to sum up to 1 g[CDW].
This test only yields sensible results if all biomass precursor
metabolites have chemical formulas assigned to them.
The molecular weight of the biomass reaction in metabolic models is
defined to be equal to 1 g/mmol. Conforming to this is essential in order
to be able to reliably calculate growth yields, to cross-compare models,
and to obtain valid predictions when simulating microbial consortia. A
deviation from 1 - 1E-03 to 1 + 1E-06 is accepted.
Implementation:
Multiplies the coefficient of each metabolite of the biomass reaction with
its molecular weight calculated from the formula, then divides the overall
sum of all the products by 1000.
"""
ann = test_biomass_consistency.annotation
reaction = model.reactions.get_by_id(reaction_id)
try:
ann["data"][reaction_id] = biomass.sum_biomass_weight(reaction)
except TypeError:
ann["data"][reaction_id] = None
ann["message"][reaction_id] = wrapper.fill(
"""One or more of the biomass components do not have a defined
formula or contain unspecified chemical groups."""
)
else:
ann["message"][reaction_id] = wrapper.fill(
"""The component molar mass of the biomass reaction {} sums up to {}
which is outside of the 1e-03 margin from 1 mmol / g[CDW] / h.
""".format(
reaction_id, ann["data"][reaction_id]
)
)
outcome = (1 - 1e-03) < ann["data"][reaction_id] < (1 + 1e-06)
ann["metric"][reaction_id] = 1.0 - float(outcome)
# To account for numerical inaccuracies, a range from 1-1e0-3 to 1+1e-06
# is implemented in the assertion check
assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass
@annotate(
title="Biomass Production In Default Medium",
format_type="number",
data=dict(),
message=dict(),
metric=dict(),
[docs])
def test_biomass_default_production(model, reaction_id):
"""
Expect biomass production in default medium.
Using flux balance analysis this test optimizes the model for growth in
the medium that is set by default. Any non-zero growth rate is accepted to
pass this test.
Implementation:
Calculate the solution of FBA with the biomass reaction set as objective
function and the model's default constraints.
"""
ann = test_biomass_default_production.annotation
ann["data"][reaction_id] = helpers.get_biomass_flux(model, reaction_id)
outcome = ann["data"][reaction_id] > model.tolerance
ann["metric"][reaction_id] = 1.0 - float(outcome)
ann["message"][reaction_id] = wrapper.fill(
"""Using the biomass reaction {} this is the growth rate (1/h) that
can be achieved when the model is simulated on the provided
default medium: {}
""".format(
reaction_id, ann["data"][reaction_id]
)
)
assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass
@annotate(
title="Biomass Production In Complete Medium",
format_type="number",
data=dict(),
message=dict(),
metric=dict(),
[docs])
def test_biomass_open_production(model, reaction_id):
"""
Expect biomass production in complete medium.
Using flux balance analysis this test optimizes the model for growth using
a complete medium i.e. unconstrained boundary reactions. Any non-zero
growth rate is accepted to pass this test.
Implementation:
Calculate the solution of FBA with the biomass reaction set as objective
function and after removing any constraints from all boundary reactions.
"""
ann = test_biomass_open_production.annotation
helpers.open_boundaries(model)
ann["data"][reaction_id] = helpers.get_biomass_flux(model, reaction_id)
outcome = ann["data"][reaction_id] > model.tolerance
ann["metric"][reaction_id] = 1.0 - float(outcome)
ann["message"][reaction_id] = wrapper.fill(
"""Using the biomass reaction {} this is the growth rate that can be
achieved when the model is simulated on a complete medium i.e.
with all the boundary reactions unconstrained: {}
""".format(
reaction_id, ann["data"][reaction_id]
)
)
assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass
@annotate(
title="Blocked Biomass Precursors In Default Medium",
format_type="count",
data=dict(),
metric=dict(),
message=dict(),
[docs])
def test_biomass_precursors_default_production(model, reaction_id):
"""
Expect production of all biomass precursors in default medium.
Using flux balance analysis this test optimizes for the production of each
metabolite that is a substrate of the biomass reaction with the exception
of atp and h2o. Optimizations are carried out using the default
conditions. This is useful when reconstructing the precursor biosynthesis
pathways of a metabolic model. To pass this test, the model should be able
to synthesis all the precursors.
Implementation:
For each biomass precursor (except ATP and H2O) add a temporary demand
reaction, then carry out FBA with this reaction as the objective. Collect
all metabolites for which this optimization is equal to zero or
infeasible.
"""
ann = test_biomass_precursors_default_production.annotation
reaction = model.reactions.get_by_id(reaction_id)
ann["data"][reaction_id] = get_ids(
biomass.find_blocked_biomass_precursors(reaction, model)
)
ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / len(
biomass.find_biomass_precursors(model, reaction)
)
ann["message"][reaction_id] = wrapper.fill(
"""Using the biomass reaction {} and when the model is simulated on the
provided default medium a total of {} precursors
({:.2%} of all precursors except h2o and atp) cannot be produced: {}
""".format(
reaction_id,
len(ann["data"][reaction_id]),
ann["metric"][reaction_id],
ann["data"][reaction_id],
)
)
assert len(ann["data"][reaction_id]) == 0, ann["message"][reaction_id]
@pytest.mark.biomass
@annotate(
title="Blocked Biomass Precursors In Complete Medium",
format_type="count",
data=dict(),
metric=dict(),
message=dict(),
[docs])
def test_biomass_precursors_open_production(model, reaction_id):
"""
Expect precursor production in complete medium.
Using flux balance analysis this test optimizes for the production of each
metabolite that is a substrate of the biomass reaction with the exception
of atp and h2o. Optimizations are carried out using a complete
medium i.e. unconstrained boundary reactions. This is useful when
reconstructing the precursor biosynthesis pathways of a metabolic model.
To pass this test, the model should be able to synthesis all the
precursors.
Implementation:
First remove any constraints from all boundary reactions, then for each
biomass precursor (except ATP and H2O) add a temporary demand
reaction, then carry out FBA with this reaction as the objective. Collect
all metabolites for which this optimization is below or equal to zero or is
infeasible.
"""
ann = test_biomass_precursors_open_production.annotation
helpers.open_boundaries(model)
reaction = model.reactions.get_by_id(reaction_id)
ann["data"][reaction_id] = get_ids(
biomass.find_blocked_biomass_precursors(reaction, model)
)
ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / len(
biomass.find_biomass_precursors(model, reaction)
)
ann["message"][reaction_id] = wrapper.fill(
"""Using the biomass reaction {} and when the model is simulated in
complete medium a total of {} precursors
({:.2%} of all precursors except h2o and atp) cannot be produced: {}
""".format(
reaction_id,
len(ann["data"][reaction_id]),
ann["metric"][reaction_id],
ann["data"][reaction_id],
)
)
assert len(ann["data"][reaction_id]) == 0, ann["message"][reaction_id]
@pytest.mark.biomass
@annotate(
title="Growth-associated Maintenance in Biomass Reaction",
format_type="raw",
data=dict(),
message=dict(),
metric=dict(),
[docs])
def test_gam_in_biomass(model, reaction_id):
"""
Expect the biomass reactions to contain ATP and ADP.
The growth-associated maintenance (GAM) term accounts for the energy in
the form of ATP that is required to synthesize macromolecules such as
Proteins, DNA and RNA, and other processes during growth. A GAM term is
therefore a requirement for any well-defined biomass reaction. There are
different ways to implement this term depending on
what kind of experimental data is available and the preferred
way of implementing the biomass reaction:
- Chemostat growth experiments yield a single GAM value representing the
required energy per gram of biomass (Figure 6 of [1]_). This can be
implemented in a lumped biomass reaction or in the final term of a split
biomass reaction.
- Experimentally delineating or estimating the GAM requirements
for each macromolecule separately is possible, yet requires either
data from multi-omics experiments [2]_ or detailed resources [1]_ ,
respectively. Individual energy requirements can either be implemented
in a split biomass equation on the term for each macromolecule, or, on
the basis of the biomass composition, they can be summed into a single
GAM value for growth and treated as mentioned above.
This test is only able to detect if a lumped biomass reaction or the final
term of a split biomass reaction contains this term. Hence, it will
only detect the use of a single GAM value as opposed to individual energy
requirements of each macromolecule. Both approaches, however, have
its merits.
Implementation:
Determines the metabolite identifiers of ATP, ADP, H2O, HO4P and H+ based
on an internal mapping table. Checks if ATP and H2O are a subset of the
reactants and ADP, HO4P and H+ a subset of the products of the biomass
reaction.
References:
.. [1] Thiele, I., & Palsson, B. Ø. (2010, January). A protocol for
generating a high-quality genome-scale metabolic reconstruction.
Nature protocols. Nature Publishing Group.
http://doi.org/10.1038/nprot.2009.203
.. [2] Hackett, S. R., Zanotelli, V. R. T., Xu, W., Goya, J., Park, J. O.,
Perlman, D. H., Gibney, P. A., Botstein, D., Storey, J. D.,
Rabinowitz, J. D. (2010, January). Systems-level analysis of
mechanisms regulating yeast metabolic flux
Science
http://doi.org/10.1126/science.aaf2786
"""
ann = test_gam_in_biomass.annotation
reaction = model.reactions.get_by_id(reaction_id)
outcome = biomass.gam_in_biomass(model, reaction)
ann["data"][reaction_id] = outcome
ann["metric"][reaction_id] = 1.0 - float(outcome)
if outcome:
ann["message"][reaction_id] = wrapper.fill(
"""Yes, {} contains a term for growth-associated maintenance.
""".format(
reaction_id
)
)
else:
ann["message"][reaction_id] = wrapper.fill(
"""No, {} does not contain a term for growth-associated
maintenance.""".format(
reaction_id
)
)
assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass
@annotate(
title="Unrealistic Growth Rate In Default Medium",
format_type="raw",
data=dict(),
message=dict(),
metric=dict(),
[docs])
def test_fast_growth_default(model, reaction_id):
"""
Expect the predicted growth rate for each BOF to be below 2.81.
The growth rate of a metabolic model should not be faster than that of the
fastest growing organism. This is based on a doubling time of Vibrio
natriegens which was reported to be 14.8 minutes by: Henry H. Lee, Nili
Ostrov, Brandon G. Wong, Michaela A. Gold, Ahmad S. Khalil,
George M. Church
in https://www.biorxiv.org/content/biorxiv/early/2016/06/12/058487.full.pdf
The calculation ln(2)/(14.8/60) ~ 2.81 yields the corresponding growth
rate.
Implementation:
Calculate the solution of FBA with the biomass reaction set as objective
function and a model's default constraints. Then check if the objective
value is higher than 2.81.
"""
ann = test_fast_growth_default.annotation
outcome = helpers.get_biomass_flux(model, reaction_id) > 2.81
ann["data"][reaction_id] = outcome
ann["metric"][reaction_id] = 1.0 - float(outcome)
if ann["data"][reaction_id]:
ann["message"][reaction_id] = wrapper.fill(
"""Using the biomass reaction {} and when the model is simulated on
the provided default medium the growth rate is *higher* than that
of the fastest bacteria.
This could be due to inconsistencies in the network or missing
constraints.""".format(
reaction_id
)
)
else:
ann["message"][reaction_id] = wrapper.fill(
"""Using the biomass reaction {} and when the model is simulated on
the provided default medium the growth rate is *lower* than that
of the fastest bacteria. This is to be expected for
a majority of organisms.""".format(
reaction_id
)
)
assert outcome, ann["message"][reaction_id]
@pytest.mark.biomass
@annotate(
title="Ratio of Direct Metabolites in Biomass Reaction",
format_type="percent",
data=dict(),
message=dict(),
metric=dict(),
@pytest.mark.biomass
@annotate(
title="Number of Missing Essential Biomass Precursors",
format_type="count",
data=dict(),
message=dict(),
metric=dict(),
[docs])
def test_essential_precursors_not_in_biomass(model, reaction_id):
"""
Expect the biomass reaction to contain all essential precursors.
There are universal components of life that make up the biomass of all
known organisms. These include all proteinogenic amino acids, deoxy- and
ribonucleotides, water and a range of metabolic cofactors.
This test reports the amount of biomass precursors that have been reported
to be essential constituents of the biomass equation. All of the following
precursors need to be included in the biomass reaction to pass the test:
Aminoacids:
trp__L, cys__L, his__L, tyr__L, met__L, phe__L, ser__L, pro__L, asp__L,
thr__L, gln__L, glu__L, ile__L, arg__L, lys__L, val__L, leu__L, ala__L,
gly, asn__L
DNA: datp, dctp, dttp, dgtp
RNA: atp, ctp, utp, gtp
Cofactors: nad, nadp, amet, fad, pydx5p, coa, thmpp, fmn and h2o
These metabolites were selected based on the results presented by
DOI:10.1016/j.ymben.2016.12.002
Please note, that the authors also suggest to count C1 carriers
(derivatives of tetrahydrofolate(B9) or tetrahydromethanopterin) as
universal cofactors. We have omitted these from this check because there
are many individual compounds that classify as C1 carriers, and it is not
clear a priori which one should be preferred. In a future update, we may
consider identifying these using a chemical ontology.
Implementation:
Determine whether the model employs a lumped or split biomass reaction.
Then, using an internal mapping table, try to identify the above list of
essential precursors in list of precursor metabolites of either type of
biomass reaction. List IDs in the models namespace if the metabolite
exists, else use the MetaNetX namespace if the metabolite does not exist
in the model. Identifies the cytosol from an internal mapping
table, and assumes that all precursors exist in that compartment.
"""
ann = test_essential_precursors_not_in_biomass.annotation
reaction = model.reactions.get_by_id(reaction_id)
ann["data"][reaction_id] = [
m for m in biomass.essential_precursors_not_in_biomass(model, reaction)
]
ann["metric"][reaction_id] = len(ann["data"][reaction_id]) / len(
biomass.find_biomass_precursors(model, reaction)
)
ann["message"][reaction_id] = wrapper.fill(
"""{} lacks a total of {} essential metabolites
({:.2%} of all biomass precursors). Specifically
these are: {}.""".format(
reaction_id,
len(ann["data"][reaction_id]),
ann["metric"][reaction_id],
ann["data"][reaction_id],
)
)
assert len(ann["data"][reaction_id]) == 0, ann["message"][reaction_id]