Source code for memote.support.consistency_helpers

# -*- coding: utf-8 -*-

# Copyright 2017 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Helper functions for stoichiometric consistency checks."""


import logging
from collections import defaultdict

import cobra
import numpy as np
import sympy
from numpy.linalg import svd
from optlang.symbolics import add
from pylru import lrudecorator
from six import iteritems, itervalues

from memote.support.helpers import find_biomass_reaction


__all__ = ("stoichiometry_matrix", "nullspace")

LOGGER = logging.getLogger(__name__)


def is_only_substrate(metabolite: cobra.Metabolite, reaction: cobra.Reaction) -> bool:
    """Determine if a metabolite is only a substrate of a reaction."""
    if reaction.reversibility:
        return False
    if reaction.get_coefficient(metabolite) < 0:
        return reaction.lower_bound >= 0.0 and reaction.upper_bound > 0
    else:
        return reaction.lower_bound < 0.0 and reaction.upper_bound <= 0


def is_only_product(metabolite: cobra.Metabolite, reaction: cobra.Reaction) -> bool:
    """Determine if a metabolite is only a product of a reaction."""
    if reaction.reversibility:
        return False
    if reaction.get_coefficient(metabolite) > 0:
        return reaction.lower_bound >= 0.0 and reaction.upper_bound > 0
    else:
        return reaction.lower_bound < 0.0 and reaction.upper_bound <= 0


def add_reaction_constraints(model, reactions, Constraint):
    """
    Add the stoichiometric coefficients as constraints.

    Parameters
    ----------
    model : optlang.Model
        The transposed stoichiometric matrix representation.
    reactions : iterable
        Container of `cobra.Reaction` instances.
    Constraint : optlang.Constraint
        The constraint class for the specific interface.

    """
    constraints = []
    for rxn in reactions:
        expression = add(
            [c * model.variables[m.id] for m, c in rxn.metabolites.items()]
        )
        constraints.append(Constraint(expression, lb=0, ub=0, name=rxn.id))
    model.add(constraints)


[docs]def stoichiometry_matrix(metabolites, reactions): """ Return the stoichiometry matrix representation of a set of reactions. The reactions and metabolites order is respected. All metabolites are expected to be contained and complete in terms of the reactions. Parameters ---------- reactions : iterable A somehow ordered list of unique reactions. metabolites : iterable A somehow ordered list of unique metabolites. Returns ------- numpy.array The 2D array that represents the stoichiometry matrix. dict A dictionary mapping metabolites to row indexes. dict A dictionary mapping reactions to column indexes. """ matrix = np.zeros((len(metabolites), len(reactions))) met_index = dict((met, i) for i, met in enumerate(metabolites)) rxn_index = dict() for i, rxn in enumerate(reactions): rxn_index[rxn] = i for met, coef in iteritems(rxn.metabolites): j = met_index[met] matrix[j, i] = coef return matrix, met_index, rxn_index
def rank(matrix, atol=1e-13, rtol=0): """ Estimate the rank, i.e., the dimension of the column space, of a matrix. The algorithm used by this function is based on the singular value decomposition of `stoichiometry_matrix`. Parameters ---------- matrix : ndarray The matrix should be at most 2-D. A 1-D array with length k will be treated as a 2-D with shape (1, k) atol : float The absolute tolerance for a zero singular value. Singular values smaller than ``atol`` are considered to be zero. rtol : float The relative tolerance for a zero singular value. Singular values less than the relative tolerance times the largest singular value are considered to be zero Notes ----- If both `atol` and `rtol` are positive, the combined tolerance is the maximum of the two; that is:: tol = max(atol, rtol * smax) Singular values smaller than ``tol`` are considered to be zero. Returns ------- int The estimated rank of the matrix. See Also -------- numpy.linalg.matrix_rank matrix_rank is basically the same as this function, but it does not provide the option of the absolute tolerance. """ matrix = np.atleast_2d(matrix) sigma = svd(matrix, compute_uv=False) tol = max(atol, rtol * sigma[0]) return int((sigma >= tol).sum())
[docs]def nullspace(matrix, atol=1e-13, rtol=0.0): """ Compute an approximate basis for the null space (kernel) of a matrix. The algorithm used by this function is based on the singular value decomposition of the given matrix. Parameters ---------- matrix : ndarray The matrix should be at most 2-D. A 1-D array with length k will be treated as a 2-D with shape (1, k) atol : float The absolute tolerance for a zero singular value. Singular values smaller than ``atol`` are considered to be zero. rtol : float The relative tolerance for a zero singular value. Singular values less than the relative tolerance times the largest singular value are considered to be zero. Notes ----- If both `atol` and `rtol` are positive, the combined tolerance is the maximum of the two; that is:: tol = max(atol, rtol * smax) Singular values smaller than ``tol`` are considered to be zero. Returns ------- ndarray If ``matrix`` is an array with shape (m, k), then the returned nullspace will be an array with shape ``(k, n)``, where n is the estimated dimension of the nullspace. References ---------- Adapted from: https://scipy.github.io/old-wiki/pages/Cookbook/RankNullspace.html """ # noqa: D402 matrix = np.atleast_2d(matrix) _, sigma, vh = svd(matrix) tol = max(atol, rtol * sigma[0]) num_nonzero = (sigma >= tol).sum() return vh[num_nonzero:].conj().T
@lrudecorator(size=2) def get_interface(model): """ Return the interface specific classes. Parameters ---------- model : cobra.Model The metabolic model under investigation. """ return ( model.solver.interface.Model, model.solver.interface.Constraint, model.solver.interface.Variable, model.solver.interface.Objective, ) @lrudecorator(size=2) def get_internals(model): """ Return non-boundary reactions and their metabolites. Boundary reactions are unbalanced by their nature. They are excluded here and only the metabolites of the others are considered. Parameters ---------- model : cobra.Model The metabolic model under investigation. """ biomass = set(find_biomass_reaction(model)) if len(biomass) == 0: LOGGER.warning( "No biomass reaction detected. Consistency test results " "are unreliable if one exists." ) return set(model.reactions) - (set(model.boundary) | biomass) def create_milp_problem(kernel, metabolites, Model, Variable, Constraint, Objective): """ Create the MILP as defined by equation (13) in [1]_. Parameters ---------- kernel : numpy.array A 2-dimensional array that represents the left nullspace of the stoichiometric matrix which is the nullspace of the transpose of the stoichiometric matrix. metabolites : iterable The metabolites in the nullspace. The length of this vector must equal the first dimension of the nullspace. Model : optlang.Model Model class for a specific optlang interface. Variable : optlang.Variable Variable class for a specific optlang interface. Constraint : optlang.Constraint Constraint class for a specific optlang interface. Objective : optlang.Objective Objective class for a specific optlang interface. References ---------- .. [1] Gevorgyan, A., M. G Poolman, and D. A Fell. "Detection of Stoichiometric Inconsistencies in Biomolecular Models." Bioinformatics 24, no. 19 (2008): 2245. """ assert ( len(metabolites) == kernel.shape[0] ), "metabolite vector and first nullspace dimension must be equal" ns_problem = Model() k_vars = list() for met in metabolites: # The element y[i] of the mass vector. y_var = Variable(met.id) k_var = Variable("k_{}".format(met.id), type="binary") k_vars.append(k_var) ns_problem.add([y_var, k_var]) # These following constraints are equivalent to 0 <= y[i] <= k[i]. ns_problem.add(Constraint(y_var - k_var, ub=0, name="switch_{}".format(met.id))) ns_problem.add(Constraint(y_var, lb=0, name="switch2_{}".format(met.id))) ns_problem.update() # add nullspace constraints for j, column in enumerate(kernel.T): expression = sympy.Add( *[ coef * ns_problem.variables[met.id] for (met, coef) in zip(metabolites, column) if coef != 0.0 ] ) constraint = Constraint(expression, lb=0, ub=0, name="ns_{}".format(j)) ns_problem.add(constraint) # The objective is to minimize the binary indicators k[i], subject to # the above inequality constraints. ns_problem.objective = Objective(1) ns_problem.objective.set_linear_coefficients({k_var: 1.0 for k_var in k_vars}) ns_problem.objective.direction = "min" return ns_problem, k_vars def add_cut(problem, indicators, bound, Constraint): """ Add an integer cut to the problem. Ensure that the same solution involving these indicator variables cannot be found by enforcing their sum to be less than before. Parameters ---------- problem : optlang.Model Specific optlang interface Model instance. indicators : iterable Binary indicator `optlang.Variable`s. bound : int Should be one less than the sum of indicators. Corresponds to P - 1 in equation (14) in [1]_. Constraint : optlang.Constraint Constraint class for a specific optlang interface. References ---------- .. [1] Gevorgyan, A., M. G Poolman, and D. A Fell. "Detection of Stoichiometric Inconsistencies in Biomolecular Models." Bioinformatics 24, no. 19 (2008): 2245. """ cut = Constraint(sympy.Add(*indicators), ub=bound) problem.add(cut) return cut def is_mass_balanced(reaction): """Confirm that a reaction is mass balanced.""" balance = defaultdict(int) for metabolite, coefficient in iteritems(reaction.metabolites): if metabolite.elements is None or len(metabolite.elements) == 0: return False for element, amount in iteritems(metabolite.elements): balance[element] += coefficient * amount return all(amount == 0 for amount in itervalues(balance)) def is_charge_balanced(reaction): """Confirm that a reaction is charge balanced.""" charge = 0 for metabolite, coefficient in iteritems(reaction.metabolites): if metabolite.charge is None: return False charge += coefficient * metabolite.charge return charge == 0