# -*- coding: utf-8 -*-
# Copyright 2017 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Perform basic tests on an instance of ``cobra.Model``."""
from __future__ import absolute_import, division
import memote.support.basic as basic
import memote.support.helpers as helpers
from memote.utils import annotate, get_ids, get_ids_and_bounds, truncate, wrapper
@annotate(title="Model Identifier", format_type="raw")
[docs]def test_model_id_presence(model):
"""
Expect that the model has an identifier.
The MIRIAM guidelines require a model to be identified via an ID.
Further, the ID will be displayed on the memote snapshot
report, which helps to distinguish the output clearly.
Implementation:
Check if the cobra.Model object has a non-empty "id"
attribute, this value is parsed from the "id" attribute of the <model> tag
in the SBML file e.g. <model fbc:strict="true" id="iJO1366">.
"""
ann = test_model_id_presence.annotation
assert hasattr(model, "id")
ann["data"] = model.id
ann["metric"] = 1.0 - float(bool(ann["data"]))
ann["message"] = "The model ID is {}".format(ann["data"])
assert bool(model.id)
@annotate(title="Total Genes", format_type="count")
[docs]def test_genes_presence(model):
"""
Expect that at least one gene is defined in the model.
A metabolic model can still be a useful tool without any
genes, however there are certain methods which rely on the presence of
genes and, more importantly, the corresponding gene-protein-reaction
rules. This test requires that there is at least one gene defined.
Implementation:
Check if the cobra.Model object has non-empty "genes"
attribute, this list is populated from the list of fbc:listOfGeneProducts
which should contain at least one fbc:geneProduct.
"""
ann = test_genes_presence.annotation
assert hasattr(model, "genes")
ann["data"] = get_ids(model.genes)
ann["message"] = "{:d} genes are defined in the model.".format(len(ann["data"]))
assert len(ann["data"]) >= 1, ann["message"]
@annotate(title="Total Reactions", format_type="count")
[docs]def test_reactions_presence(model):
"""
Expect that at least one reaction is defined in the model.
To be useful a metabolic model should consist at least of a few reactions.
This test simply checks if there are more than zero reactions.
Implementation:
Check if the cobra.Model object has non-empty "reactions"
attribute, this list is populated from the list of sbml:listOfReactions
which should contain at least one sbml:reaction.
"""
ann = test_reactions_presence.annotation
assert hasattr(model, "reactions")
ann["data"] = get_ids(model.reactions)
ann["message"] = "{:d} reactions are defined in the model.".format(len(ann["data"]))
assert len(ann["data"]) >= 1, ann["message"]
@annotate(title="Total Metabolites", format_type="count")
@annotate(title="Metabolites without Formula", format_type="count")
@annotate(title="Metabolites without Charge", format_type="count")
@annotate(title="Reactions without GPR", format_type="count")
[docs]def test_gene_protein_reaction_rule_presence(model):
"""
Expect all non-exchange reactions to have a GPR rule.
Gene-Protein-Reaction rules express which gene has what function.
The presence of this annotation is important to justify the existence
of reactions in the model, and is required to conduct in silico gene
deletion studies. However, reactions without GPR may also be valid:
Spontaneous reactions, or known reactions with yet undiscovered genes
likely lack GPR.
Implementation:
Check if each cobra.Reaction has a non-empty
"gene_reaction_rule" attribute, which is set by the parser if there is an
fbc:geneProductAssociation defined for the corresponding reaction in the
SBML.
"""
ann = test_gene_protein_reaction_rule_presence.annotation
missing_gpr_metabolic_rxns = set(
basic.check_gene_protein_reaction_rule_presence(model)
).difference(set(model.boundary))
ann["data"] = get_ids(missing_gpr_metabolic_rxns)
ann["metric"] = len(ann["data"]) / len(model.reactions)
ann["message"] = wrapper.fill(
"""There are a total of {} reactions ({:.2%}) without GPR:
{}""".format(
len(ann["data"]), ann["metric"], truncate(ann["data"])
)
)
assert len(ann["data"]) == 0, ann["message"]
@annotate(title="Non-Growth Associated Maintenance Reaction", format_type="count")
[docs]def test_ngam_presence(model):
"""
Expect a single non growth-associated maintenance reaction.
The Non-Growth Associated Maintenance reaction (NGAM) is an
ATP-hydrolysis reaction added to metabolic models to represent energy
expenses that the cell invests in continuous processes independent of
the growth rate. Memote tries to infer this reaction from a list of
buzzwords, and the stoichiometry and components of a simple ATP-hydrolysis
reaction.
Implementation:
From the list of all reactions that convert ATP to ADP select the reactions
that match the irreversible reaction "ATP + H2O -> ADP + HO4P + H+",
whose metabolites are situated within the main model compartment.
The main model compartment is assumed to be the cytosol, yet, if that
cannot be identified, it is assumed to be the compartment with the most
metabolites. The resulting list of reactions is then filtered further by
attempting to match the reaction name with any of the following buzzwords
('maintenance', 'atpm', 'requirement', 'ngam', 'non-growth', 'associated').
If this is possible only the filtered reactions are returned, if not the
list is returned as is.
"""
ann = test_ngam_presence.annotation
ann["data"] = get_ids(basic.find_ngam(model))
ann["metric"] = 1.0 - float(len(ann["data"]) == 1)
ann["message"] = wrapper.fill(
"""A total of {} NGAM reactions could be identified:
{}""".format(
len(ann["data"]), truncate(ann["data"])
)
)
assert len(ann["data"]) == 1, ann["message"]
@annotate(title="Metabolic Coverage", format_type="percent")
@annotate(title="Total Compartments", format_type="count")
[docs]def test_compartments_presence(model):
"""
Expect that two or more compartments are defined in the model.
While simplified metabolic models may be perfectly viable, generally
across the tree of life organisms contain at least one distinct
compartment: the cytosol or cytoplasm. In the case of prokaryotes there is
usually a periplasm, and eurkaryotes are more complex. In addition to the
internal compartment, a metabolic model also reflects the extracellular
environment i.e. the medium/ metabolic context in which the modelled cells
grow. Hence, in total, at least two compartments can be expected from a
metabolic model.
Implementation:
Check if the cobra.Model object has a non-empty "compartments"
attribute, this list is populated from the list of sbml:listOfCompartments
which should contain at least two sbml:compartment elements.
"""
ann = test_compartments_presence.annotation
assert hasattr(model, "compartments")
ann["data"] = list(model.compartments)
ann["metric"] = 1.0 - float(len(ann["data"]) >= 2)
ann["message"] = wrapper.fill(
"""A total of {:d} compartments are defined in the model: {}""".format(
len(ann["data"]), truncate(ann["data"])
)
)
assert len(ann["data"]) >= 2, ann["message"]
@annotate(title="Enzyme Complexes", format_type="count")
[docs]def test_protein_complex_presence(model):
"""
Expect that more than one enzyme complex is present in the model.
Based on the gene-protein-reaction (GPR) rules, it is possible to infer
whether a reaction is catalyzed by a single gene product, isozymes or by a
heteromeric protein complex. This test checks that at least one
such heteromeric protein complex is defined in any GPR of the model. For
S. cerevisiae it could be shown that "essential proteins tend to [cluster]
together in essential complexes"
(https://doi.org/10.1074%2Fmcp.M800490-MCP200).
This might also be a relevant metric for other organisms.
Implementation:
Identify GPRs which contain at least one logical AND that combines two
different gene products.
"""
ann = test_protein_complex_presence.annotation
ann["data"] = get_ids(basic.find_protein_complexes(model))
ann["metric"] = len(ann["data"]) / len(model.reactions)
ann["message"] = wrapper.fill(
"""A total of {:d} reactions are catalyzed by complexes defined
through GPR rules in the model.""".format(
len(ann["data"])
)
)
assert len(ann["data"]) >= 1, ann["message"]
@annotate(title="Purely Metabolic Reactions", format_type="count")
@annotate(title="Purely Metabolic Reactions with Constraints", format_type="count")
)
)
@annotate(title="Transport Reactions", format_type="count")
[docs]def test_find_transport_reactions(model):
"""
Expect >= 1 transport reactions are present in the model.
Cellular metabolism in any organism usually involves the transport of
metabolites across a lipid bi-layer. This test reports how many
of these reactions, which transports metabolites from one compartment
to another, are present in the model, as at least one transport reaction
must be present for cells to take up nutrients and/or excrete waste.
Implementation:
A transport reaction is defined as follows:
1. It contains metabolites from at least 2 compartments and
2. at least 1 metabolite undergoes no chemical reaction, i.e.,
the formula and/or annotation stays the same on both sides of the equation.
A notable exception is transport via PTS, which also contains the following
restriction:
3. The transported metabolite(s) are transported into a compartment through
the exchange of a phosphate.
An example of transport via PTS would be
pep(c) + glucose(e) -> glucose-6-phosphate(c) + pyr(c)
Reactions similar to transport via PTS (referred to as "modified transport
reactions") follow a similar pattern:
A(x) + B-R(y) -> A-R(y) + B(y)
Such modified transport reactions can be detected, but only when the
formula is defined for all metabolites in a particular reaction. If this
is not the case, transport reactions are identified through annotations,
which cannot detect modified transport reactions.
"""
ann = test_find_transport_reactions.annotation
ann["data"] = get_ids(helpers.find_transport_reactions(model))
ann["metric"] = len(ann["data"]) / len(model.reactions)
ann["message"] = wrapper.fill(
"""A total of {:d} ({:.2%}) transport reactions are defined in the
model, this excludes purely metabolic reactions, exchanges, or
pseudo-reactions: {}""".format(
len(ann["data"]), ann["metric"], truncate(ann["data"])
)
)
assert len(ann["data"]) >= 1, ann["message"]
@annotate(title="Transport Reactions with Constraints", format_type="count")
[docs]def test_find_constrained_transport_reactions(model):
"""
Expect zero or more transport reactions to have fixed constraints.
Cellular metabolism in any organism usually involves the transport of
metabolites across a lipid bi-layer. Hence, this test reports how many
of these reactions, which transports metabolites from one compartment
to another, have fixed constraints. This test does not have any mandatory
'pass' criteria.
Implementation:
Please refer to "Transport Reactions" for details on how memote identifies
transport reactions.
From the pool of transport reactions identify reactions which are
constrained to values other than the model's median lower and upper bounds.
"""
ann = test_find_constrained_transport_reactions.annotation
transporters = helpers.find_transport_reactions(model)
ann["data"] = get_ids_and_bounds(
[rxn for rxn in transporters if basic.is_constrained_reaction(model, rxn)]
)
ann["metric"] = len(ann["data"]) / len(transporters)
ann["message"] = wrapper.fill(
"""A total of {:d} ({:.2%}) transport reactions have fixed
constraints in the model: {}""".format(
len(ann["data"]), ann["metric"], truncate(ann["data"])
)
)
@annotate(title="Fraction of Transport Reactions without GPR", format_type="percent")
[docs]def test_transport_reaction_gpr_presence(model):
"""
Expect a small fraction of transport reactions not to have a GPR rule.
As it is hard to identify the exact transport processes within a cell,
transport reactions are often added purely for modeling purposes.
Highlighting where assumptions have been made versus where
there is proof may help direct the efforts to improve transport and
transport energetics of the tested metabolic model.
However, transport reactions without GPR may also be valid:
Diffusion, or known reactions with yet undiscovered genes likely lack GPR.
Implementation:
Check which cobra.Reactions classified as transport reactions have a
non-empty "gene_reaction_rule" attribute.
"""
# TODO: Update threshold with improved insight from meta study.
ann = test_transport_reaction_gpr_presence.annotation
ann["data"] = get_ids(basic.check_transport_reaction_gpr_presence(model))
ann["metric"] = len(ann["data"]) / len(helpers.find_transport_reactions(model))
ann["message"] = wrapper.fill(
"""There are a total of {} transport reactions ({:.2%} of all
transport reactions) without GPR:
{}""".format(
len(ann["data"]), ann["metric"], truncate(ann["data"])
)
)
assert ann["metric"] < 0.2, ann["message"]
@annotate(title="Number of Reversible Oxygen-Containing Reactions", format_type="count")
[docs]def test_find_reversible_oxygen_reactions(model):
"""
Expect zero or more oxygen-containing reactions to be reversible.
The directionality of oxygen-producing/-consuming reactions affects the
model's ability to grow anaerobically i.e. create faux-anaerobic organisms.
This test reports how many of these oxygen-containing reactions are
reversible. This test does not have any mandatory 'pass' criteria.
Implementation:
First, find the metabolite representing atmospheric oxygen in the model on
the basis of an internal mapping table or by specifically looking for the
formula "O2". Then, find all reactions that produce or consume oxygen and
report those that are reversible.
"""
ann = test_find_reversible_oxygen_reactions.annotation
o2_rxns = basic.find_oxygen_reactions(model)
ann["data"] = get_ids([rxn for rxn in o2_rxns if rxn.reversibility])
ann["metric"] = len(ann["data"]) / len(o2_rxns)
ann["message"] = wrapper.fill(
"""There are a total of {} reversible oxygen-containing reactions
({:.2%} of all oxygen-containing reactions): {}""".format(
len(ann["data"]), ann["metric"], truncate(ann["data"])
)
)
@annotate(title="Unique Metabolites", format_type="count")
@annotate(title="Duplicate Metabolites in Identical Compartments", format_type="count")
@annotate(title="Reactions With Partially Identical Annotations", format_type="percent")
[docs]def test_find_reactions_with_partially_identical_annotations(model):
"""
Expect there to be zero duplicate reactions.
Identify reactions in a pairwise manner that are annotated
with identical database references. This does not take into account a
reaction's directionality or compartment.
The main reason for having this test is to help cleaning up merged models
or models from automated reconstruction pipelines as these are prone to
having identical reactions with identifiers from different namespaces.
It could also be useful to identify a 'type' of reaction that
occurs in several compartments.
Implementation:
Identify duplicate reactions globally by checking if any
two metabolic reactions have the same entries in their annotation
attributes. The heuristic looks at annotations with the keys
"metanetx.reaction", "kegg.reaction", "brenda", "rhea", "biocyc",
"bigg.reaction" only.
"""
ann = test_find_reactions_with_partially_identical_annotations.annotation
duplicates, total = basic.find_reactions_with_partially_identical_annotations(model)
ann["data"] = duplicates
ann["metric"] = total / len(model.reactions)
ann["message"] = wrapper.fill(
"""Based on annotations there are {} different groups of overlapping
annotation which corresponds to a total of {} duplicated reactions in
the model.""".format(
len(duplicates), total
)
)
assert total == 0, ann["message"]
@annotate(title="Duplicate Reactions", format_type="percent")
[docs]def test_find_duplicate_reactions(model):
"""
Expect there to be zero duplicate reactions.
Identify reactions in a pairwise manner that use the same set
of metabolites including potentially duplicate metabolites. Moreover, it
will take a reaction's directionality and compartment into account.
The main reason for having this test is to help cleaning up merged models
or models from automated reconstruction pipelines as these are prone to
having identical reactions with identifiers from different namespaces.
Implementation:
Compare reactions in a pairwise manner.
For each reaction, the metabolite annotations are checked for a description
of the structure (via InChI and InChIKey).If they exist, substrates and
products as well as the stoichiometries of any reaction pair are compared.
Only reactions where the substrates, products, stoichiometry and
reversibility are identical are considered to be duplicates.
This test will not be able to identify duplicate reactions if there are no
structure annotations. Further, it will report reactions with
differing bounds as equal if they otherwise match the above conditions.
"""
ann = test_find_duplicate_reactions.annotation
duplicates, num = basic.find_duplicate_reactions(model)
ann["data"] = duplicates
ann["metric"] = num / len(model.reactions)
ann["message"] = wrapper.fill(
"""Based on metabolites, directionality and compartment there are a
total of {} reactions in the model which have duplicates:
{}""".format(
num, truncate(duplicates)
)
)
assert num == 0, ann["message"]
@annotate(title="Reactions With Identical Genes", format_type="percent")
[docs]def test_find_reactions_with_identical_genes(model):
"""
Expect there to be zero duplicate reactions.
Identify reactions in a pairwise manner that use identical
sets of genes. It does *not* take into account a reaction's directionality,
compartment, metabolites or annotations.
The main reason for having this test is to help cleaning up merged models
or models from automated reconstruction pipelines as these are prone to
having identical reactions with identifiers from different namespaces.
Implementation:
Compare reactions in a pairwise manner and group reactions whose genes
are identical. Skip reactions with missing genes.
"""
ann = test_find_reactions_with_identical_genes.annotation
rxn_groups, num_dup = basic.find_reactions_with_identical_genes(model)
ann["data"] = rxn_groups
ann["metric"] = num_dup / len(model.reactions)
ann["message"] = wrapper.fill(
"""Based only on equal genes there are {} different groups of
identical reactions which corresponds to a total of {}
duplicated reactions in the model.""".format(
len(rxn_groups), num_dup
)
)
assert num_dup == 0, ann["message"]
@annotate(title="Medium Components", format_type="count")
)
)