Source code for memote.experimental.experimental_base

# Copyright 2018 Novo Nordisk Foundation Center for Biosustainability,
# Technical University of Denmark.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


"""Provide a class for medium definitions."""


import json
import logging


try:
    from importlib.resources import files
except ImportError:
    from importlib_resources import files

from goodtables import validate

# Importing the checks is necessary in order to register them.
import memote.experimental.schemata
from memote.experimental.checks import UnknownIdentifier  # noqa: F401
from memote.experimental.tabular import read_tabular


__all__ = ("ExperimentalBase",)


LOGGER = logging.getLogger(__name__)


[docs]class ExperimentalBase(object):
    """Represent a specific medium condition."""

[docs]    SCHEMA = None
[docs]    TRUTHY = {"true", "True", "TRUE", "1", "yes", "Yes", "YES"}

    def __init__(self, identifier, obj, filename, **kwargs):
        """
        Initialize a medium.

        Parameters
        ----------
        identifier : str
        obj : dict
        filename : str or pathlib.Path
            The full file path. May be a compressed file.
        kwargs

        """
        super(ExperimentalBase, self).__init__(**kwargs)
        self.id = identifier
        self.label = obj.get("label")
        if self.label is None:
            self.label = ""
        self.filename = filename
        self.data = None
        self.schema = None

[docs]    def load(self, dtype_conversion=None):
        """
        Load the data table and corresponding validation schema.

        Parameters
        ----------
        dtype_conversion : dict
            Column names as keys and corresponding type for loading the data.
            Please take a look at the `pandas documentation
            <https://pandas.pydata.org/pandas-docs/stable/io.html#specifying-column-data-types>`__
            for detailed explanations.

        """
        self.data = read_tabular(self.filename, dtype_conversion)
        with files(memote.experimental.schemata).joinpath(self.SCHEMA).open(
            mode="r", encoding="utf-8"
        ) as file_handle:
            self.schema = json.load(file_handle)

[docs]    def validate(self, model, checks=None):
        """Use a defined schema to validate the given table."""
        if checks is None:
            checks = []
        records = self.data.to_dict("records")
        self.evaluate_report(
            validate(
                records,
                headers=list(records[0]),
                preset="table",
                schema=self.schema,
                order_fields=True,
                checks=checks,
            )
        )

    @staticmethod
[docs]    def evaluate_report(report):
        """Iterate over validation errors."""
        if report["valid"]:
            return
        for warn in report["warnings"]:
            LOGGER.warning(warn)
        # We only ever test one table at a time.
        for err in report["tables"][0]["errors"]:
            LOGGER.error(err["message"])
        raise ValueError("Invalid data file. Please see errors above.")