diff --git a/pymilo/chains/chain.py b/pymilo/chains/chain.py new file mode 100644 index 00000000..a2bcda3d --- /dev/null +++ b/pymilo/chains/chain.py @@ -0,0 +1,214 @@ +# -*- coding: utf-8 -*- +"""PyMilo Chain Module.""" + +from traceback import format_exc +from abc import ABC, abstractmethod + +from ..utils.util import get_sklearn_type +from ..transporters.transporter import Command +from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes +from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes + + +class Chain(ABC): + """ + Chain Interface. + + Each Chain serializes/deserializes the given model. + """ + + @abstractmethod + def is_supported(self, model): + """ + Check if the given model is a sklearn's ML model supported by this chain. + + :param model: a string name of an ML model or a sklearn object of it + :type model: any object + :return: check result as bool + """ + + @abstractmethod + def transport(self, request, command, is_inner_model=False): + """ + Return the transported (serialized or deserialized) model. + + :param request: given ML model to be transported + :type request: any object + :param command: command to specify whether the request should be serialized or deserialized + :type command: transporter.Command + :param is_inner_model: determines whether it is an inner model of a super ML model + :type is_inner_model: boolean + :return: the transported request as a json string or sklearn ML model + """ + + @abstractmethod + def serialize(self, model): + """ + Return the serialized json string of the given model. + + :param model: given ML model to be get serialized + :type model: sklearn ML model + :return: the serialized json string of the given ML model + """ + + @abstractmethod + def deserialize(self, serialized_model, is_inner_model=False): + """ + Return the associated sklearn ML model of the given previously serialized ML model. + + :param serialized_model: given json string of a ML model to get deserialized to associated sklearn ML model + :type serialized_model: obj + :param is_inner_model: determines whether it is an inner ML model of a super ML model + :type is_inner_model: boolean + :return: associated sklearn ML model + """ + + @abstractmethod + def validate(self, model, command): + """ + Check if the provided inputs are valid in relation to each other. + + :param model: a sklearn ML model or a json string of it, serialized through the pymilo export + :type model: obj + :param command: command to specify whether the request should be serialized or deserialized + :type command: transporter.Command + :return: None + """ + + +class AbstractChain(Chain): + """Abstract Chain with the general implementation of the Chain interface.""" + + def __init__(self, transporters, supported_models): + """ + Initialize the AbstractChain instance. + + :param transporters: worker transporters dedicated to this chain + :type transporters: transporter.AbstractTransporter[] + :param supported_models: supported sklearn ML models belong to this chain + :type supported_models: dict + :return: an instance of the AbstractChain class + """ + self._transporters = transporters + self._supported_models = supported_models + + def is_supported(self, model): + """ + Check if the given model is a sklearn's ML model supported by this chain. + + :param model: a string name of an ML model or a sklearn object of it + :type model: any object + :return: check result as bool + """ + model_name = model if isinstance(model, str) else get_sklearn_type(model) + return model_name in self._supported_models + + def transport(self, request, command, is_inner_model=False): + """ + Return the transported (serialized or deserialized) model. + + :param request: given ML model to be transported + :type request: any object + :param command: command to specify whether the request should be serialized or deserialized + :type command: transporter.Command + :param is_inner_model: determines whether it is an inner model of a super ML model + :type is_inner_model: boolean + :return: the transported request as a json string or sklearn ML model + """ + if not is_inner_model: + self.validate(request, command) + + if command == Command.SERIALIZE: + try: + return self.serialize(request) + except Exception as e: + raise PymiloSerializationException( + { + 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, + 'error': { + 'Exception': repr(e), + 'Traceback': format_exc(), + }, + 'object': request, + }) + + elif command == Command.DESERIALIZE: + try: + return self.deserialize(request, is_inner_model) + except Exception as e: + raise PymiloDeserializationException( + { + 'error_type': DeserializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, + 'error': { + 'Exception': repr(e), + 'Traceback': format_exc()}, + 'object': request + }) + + def serialize(self, model): + """ + Return the serialized json string of the given model. + + :param model: given ML model to be get serialized + :type model: sklearn ML model + :return: the serialized json string of the given ML model + """ + for transporter in self._transporters: + self._transporters[transporter].transport(model, Command.SERIALIZE) + return model.__dict__ + + def deserialize(self, serialized_model, is_inner_model=False): + """ + Return the associated sklearn ML model of the given previously serialized ML model. + + :param serialized_model: given json string of a ML model to get deserialized to associated sklearn ML model + :type serialized_model: obj + :param is_inner_model: determines whether it is an inner ML model of a super ML model + :type is_inner_model: boolean + :return: associated sklearn ML model + """ + raw_model = None + data = None + if is_inner_model: + raw_model = self._supported_models[serialized_model["type"]]() + data = serialized_model["data"] + else: + raw_model = self._supported_models[serialized_model.type]() + data = serialized_model.data + for transporter in self._transporters: + self._transporters[transporter].transport( + serialized_model, Command.DESERIALIZE, is_inner_model) + for item in data: + setattr(raw_model, item, data[item]) + return raw_model + + def validate(self, model, command): + """ + Check if the provided inputs are valid in relation to each other. + + :param model: a sklearn ML model or a json string of it, serialized through the pymilo export + :type model: obj + :param command: command to specify whether the request should be serialized or deserialized + :type command: transporter.Command + :return: None + """ + if command == Command.SERIALIZE: + if self.is_supported(model): + return + else: + raise PymiloSerializationException( + { + 'error_type': SerializationErrorTypes.INVALID_MODEL, + 'object': model + } + ) + elif command == Command.DESERIALIZE: + if self.is_supported(model.type): + return + else: + raise PymiloDeserializationException( + { + 'error_type': DeserializationErrorTypes.INVALID_MODEL, + 'object': model + } + ) diff --git a/pymilo/chains/clustering_chain.py b/pymilo/chains/clustering_chain.py index 81e8d47c..e4c336a4 100644 --- a/pymilo/chains/clustering_chain.py +++ b/pymilo/chains/clustering_chain.py @@ -1,20 +1,13 @@ # -*- coding: utf-8 -*- -"""PyMilo chain for clustering models.""" -from ..transporters.transporter import Command +"""PyMilo chain for Clustering models.""" -from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter -from ..transporters.function_transporter import FunctionTransporter +from ..chains.chain import AbstractChain +from ..pymilo_param import SKLEARN_CLUSTERING_TABLE, NOT_SUPPORTED from ..transporters.cfnode_transporter import CFNodeTransporter +from ..transporters.function_transporter import FunctionTransporter +from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter from ..transporters.preprocessing_transporter import PreprocessingTransporter -from ..utils.util import get_sklearn_type - -from ..pymilo_param import SKLEARN_CLUSTERING_TABLE, NOT_SUPPORTED -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes -from traceback import format_exc - -bisecting_kmeans_support = SKLEARN_CLUSTERING_TABLE["BisectingKMeans"] != NOT_SUPPORTED CLUSTERING_CHAIN = { "PreprocessingTransporter": PreprocessingTransporter(), "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), @@ -22,137 +15,10 @@ "CFNodeTransporter": CFNodeTransporter(), } -if bisecting_kmeans_support: - from ..transporters.randomstate_transporter import RandomStateTransporter +if SKLEARN_CLUSTERING_TABLE["BisectingKMeans"] != NOT_SUPPORTED: from ..transporters.bisecting_tree_transporter import BisectingTreeTransporter + from ..transporters.randomstate_transporter import RandomStateTransporter CLUSTERING_CHAIN["RandomStateTransporter"] = RandomStateTransporter() CLUSTERING_CHAIN["BisectingTreeTransporter"] = BisectingTreeTransporter() - -def is_clusterer(model): - """ - Check if the input model is a sklearn's clustering model. - - :param model: is a string name of a clusterer or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_CLUSTERING_TABLE - else: - return get_sklearn_type(model) in SKLEARN_CLUSTERING_TABLE.keys() - - -def transport_clusterer(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given clusterer to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn clustering model - """ - if not is_inner_model: - _validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_clusterer(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc(), - }, - 'object': request, - }) - - elif command == Command.DESERIALIZE: - try: - return deserialize_clusterer(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - -def serialize_clusterer(clusterer_object): - """ - Return the serialized json string of the given clustering model. - - :param clusterer_object: given model to be get serialized - :type clusterer_object: any sklearn clustering model - :return: the serialized json string of the given clusterer - """ - for transporter in CLUSTERING_CHAIN: - CLUSTERING_CHAIN[transporter].transport( - clusterer_object, Command.SERIALIZE) - return clusterer_object.__dict__ - - -def deserialize_clusterer(clusterer, is_inner_model=False): - """ - Return the associated sklearn clustering model of the given clusterer. - - :param clusterer: given json string of a clustering model to get deserialized to associated sklearn clustering model - :type clusterer: obj - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn clustering model - """ - raw_model = None - data = None - if is_inner_model: - raw_model = SKLEARN_CLUSTERING_TABLE[clusterer["type"]]() - data = clusterer["data"] - else: - raw_model = SKLEARN_CLUSTERING_TABLE[clusterer.type]() - data = clusterer.data - - for transporter in CLUSTERING_CHAIN: - CLUSTERING_CHAIN[transporter].transport( - clusterer, Command.DESERIALIZE, is_inner_model) - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def _validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - - :param model: a sklearn clusterer model or a json string of it, serialized through the pymilo export. - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_clusterer(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - if is_clusterer(model.type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) +clustering_chain = AbstractChain(CLUSTERING_CHAIN, SKLEARN_CLUSTERING_TABLE) diff --git a/pymilo/chains/cross_decomposition_chain.py b/pymilo/chains/cross_decomposition_chain.py index b2488ed6..e29e5325 100644 --- a/pymilo/chains/cross_decomposition_chain.py +++ b/pymilo/chains/cross_decomposition_chain.py @@ -1,148 +1,15 @@ # -*- coding: utf-8 -*- -"""PyMilo chain for cross decomposition models.""" -from ..transporters.transporter import Command +"""PyMilo chain for Cross Decomposition models.""" +from ..chains.chain import AbstractChain +from ..pymilo_param import SKLEARN_CROSS_DECOMPOSITION_TABLE from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter from ..transporters.preprocessing_transporter import PreprocessingTransporter -from ..pymilo_param import SKLEARN_CROSS_DECOMPOSITION_TABLE -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes - -from ..utils.util import get_sklearn_type - -from traceback import format_exc - -CROSS_DECOMPOSITION_CHAIN = { - "PreprocessingTransporter": PreprocessingTransporter(), - "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), -} - - -def is_cross_decomposition(model): - """ - Check if the input model is a sklearn's cross decomposition model. - - :param model: is a string name of a cross decomposition or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_CROSS_DECOMPOSITION_TABLE - else: - return get_sklearn_type(model) in SKLEARN_CROSS_DECOMPOSITION_TABLE.keys() - - -def transport_cross_decomposition(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given cross decomposition model to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn cross decomposition model - """ - if not is_inner_model: - _validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_cross_decomposition(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc(), - }, - 'object': request, - }) - - elif command == Command.DESERIALIZE: - try: - return deserialize_cross_decomposition(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - -def serialize_cross_decomposition(cross_decomposition_object): - """ - Return the serialized json string of the given cross decomposition model. - - :param cross_decomposition_object: given model to be get serialized - :type cross_decomposition_object: any sklearn cross decomposition model - :return: the serialized json string of the given cross decomposition model - """ - for transporter in CROSS_DECOMPOSITION_CHAIN: - CROSS_DECOMPOSITION_CHAIN[transporter].transport( - cross_decomposition_object, Command.SERIALIZE) - return cross_decomposition_object.__dict__ - - -def deserialize_cross_decomposition(cross_decomposition, is_inner_model=False): - """ - Return the associated sklearn cross decomposition model. - - :param cross_decomposition: given json string of a cross decomposition model to get deserialized to associated sklearn cross decomposition model - :type cross_decomposition: obj - :param is_inner_model: determines whether it is an inner linear model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn cross decomposition model - """ - raw_model = None - data = None - if is_inner_model: - raw_model = SKLEARN_CROSS_DECOMPOSITION_TABLE[cross_decomposition["type"]]() - data = cross_decomposition["data"] - else: - raw_model = SKLEARN_CROSS_DECOMPOSITION_TABLE[cross_decomposition.type]() - data = cross_decomposition.data - - for transporter in CROSS_DECOMPOSITION_CHAIN: - CROSS_DECOMPOSITION_CHAIN[transporter].transport( - cross_decomposition, Command.DESERIALIZE, is_inner_model) - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def _validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - - :param model: a sklearn cross decomposition model or a json string of it, serialized through the pymilo export. - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_cross_decomposition(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - if is_cross_decomposition(model.type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) +cross_decomposition_chain = AbstractChain( + { + "PreprocessingTransporter": PreprocessingTransporter(), + "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), + }, + SKLEARN_CROSS_DECOMPOSITION_TABLE, +) diff --git a/pymilo/chains/decision_tree_chain.py b/pymilo/chains/decision_tree_chain.py index 2b47d6cf..89ea8552 100644 --- a/pymilo/chains/decision_tree_chain.py +++ b/pymilo/chains/decision_tree_chain.py @@ -1,153 +1,19 @@ # -*- coding: utf-8 -*- -"""PyMilo chain for decision trees.""" -from ..transporters.transporter import Command +"""PyMilo chain for Decision Trees models.""" +from ..chains.chain import AbstractChain +from ..pymilo_param import SKLEARN_DECISION_TREE_TABLE from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter -from ..transporters.tree_transporter import TreeTransporter -from ..transporters.randomstate_transporter import RandomStateTransporter from ..transporters.preprocessing_transporter import PreprocessingTransporter +from ..transporters.randomstate_transporter import RandomStateTransporter +from ..transporters.tree_transporter import TreeTransporter -from ..utils.util import get_sklearn_type - -from ..pymilo_param import SKLEARN_DECISION_TREE_TABLE - -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes -from traceback import format_exc - - -DECISION_TREE_CHAIN = { - "PreprocessingTransporter": PreprocessingTransporter(), - "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), - "RandomStateTransporter": RandomStateTransporter(), - "TreeTransporter": TreeTransporter(), -} - - -def is_decision_tree(model): - """ - Check if the input model is a sklearn's decision tree. - - :param model: is a string name of a decision tree or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_DECISION_TREE_TABLE - else: - return get_sklearn_type(model) in SKLEARN_DECISION_TREE_TABLE.keys() - - -def transport_decision_tree(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given decision tree model to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn decision tree model - """ - if not is_inner_model: - _validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_decision_tree(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc(), - }, - 'object': request, - }) - - elif command == Command.DESERIALIZE: - try: - return deserialize_decision_tree(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - -def serialize_decision_tree(decision_tree_object): - """ - Return the serialized json string of the given decision tree model. - - :param decision_tree_object: given model to be get serialized - :type decision_tree_object: any sklearn decision tree model - :return: the serialized json string of the given decision tree model - """ - for transporter in DECISION_TREE_CHAIN: - DECISION_TREE_CHAIN[transporter].transport( - decision_tree_object, Command.SERIALIZE) - return decision_tree_object.__dict__ - - -def deserialize_decision_tree(decision_tree, is_inner_model=False): - """ - Return the associated sklearn decision tree model of the given decision_tree. - - :param decision_tree: given json string of a decision tree model to get deserialized to associated sklearn decision tree model - :type decision_tree: obj - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn decision tree model - """ - raw_model = None - data = None - if is_inner_model: - raw_model = SKLEARN_DECISION_TREE_TABLE[decision_tree["type"]]() - data = decision_tree["data"] - else: - raw_model = SKLEARN_DECISION_TREE_TABLE[decision_tree.type]() - data = decision_tree.data - - for transporter in DECISION_TREE_CHAIN: - DECISION_TREE_CHAIN[transporter].transport( - decision_tree, Command.DESERIALIZE, is_inner_model) - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def _validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - - :param model: a sklearn decision tree model or a json string of it, serialized through the pymilo export. - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_decision_tree(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - if is_decision_tree(model.type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) +decision_trees_chain = AbstractChain( + { + "PreprocessingTransporter": PreprocessingTransporter(), + "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), + "RandomStateTransporter": RandomStateTransporter(), + "TreeTransporter": TreeTransporter(), + }, + SKLEARN_DECISION_TREE_TABLE, +) diff --git a/pymilo/chains/ensemble_chain.py b/pymilo/chains/ensemble_chain.py index 759bd171..3192d85e 100644 --- a/pymilo/chains/ensemble_chain.py +++ b/pymilo/chains/ensemble_chain.py @@ -1,30 +1,25 @@ # -*- coding: utf-8 -*- """PyMilo chain for ensemble models.""" + +import copy +from ast import literal_eval + +from numpy import ndarray, asarray + +from ..chains.chain import AbstractChain +from ..transporters.binmapper_transporter import BinMapperTransporter +from ..transporters.bunch_transporter import BunchTransporter from ..transporters.transporter import Command from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter -from ..transporters.randomstate_transporter import RandomStateTransporter -from ..transporters.lossfunction_transporter import LossFunctionTransporter -from ..transporters.bunch_transporter import BunchTransporter from ..transporters.generator_transporter import GeneratorTransporter -from ..transporters.treepredictor_transporter import TreePredictorTransporter -from ..transporters.binmapper_transporter import BinMapperTransporter +from ..transporters.lossfunction_transporter import LossFunctionTransporter from ..transporters.preprocessing_transporter import PreprocessingTransporter - +from ..transporters.randomstate_transporter import RandomStateTransporter +from ..transporters.treepredictor_transporter import TreePredictorTransporter from ..pymilo_param import SKLEARN_ENSEMBLE_TABLE - +from ..utils.util import check_str_in_iterable, get_sklearn_type from .util import get_concrete_transporter -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes - -from ..utils.util import get_sklearn_type, check_str_in_iterable - -from numpy import ndarray, asarray -from traceback import format_exc -from ast import literal_eval - -import copy - ENSEMBLE_CHAIN = { "PreprocessingTransporter": PreprocessingTransporter(), "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), @@ -37,6 +32,146 @@ } +class EnsembleModelChain(AbstractChain): + """EnsembleModelChain developed to handle sklearn Ensemble ML model transportation.""" + + def serialize(self, ensemble_object): + """ + Return the serialized json string of the given ensemble model. + + :param ensemble_object: given model to be get serialized + :type ensemble_object: any sklearn ensemble model + :return: the serialized json string of the given ensemble + """ + for transporter in self._transporters: + if transporter != "GeneralDataStructureTransporter": + self._transporters[transporter].transport( + ensemble_object, Command.SERIALIZE) + + for key, value in ensemble_object.__dict__.items(): + if isinstance(value, list): + has_inner_tuple_with_ml_model = False + pt = PreprocessingTransporter() + for idx, item in enumerate(value): + if isinstance(item, tuple): + listed_tuple = list(item) + for inner_idx, inner_item in enumerate(listed_tuple): + if pt.is_preprocessing_module(inner_item): + listed_tuple[inner_idx] = pt.serialize_pre_module(inner_item) + else: + has_inner_model, result = serialize_possible_ml_model(inner_item) + if has_inner_model: + has_inner_tuple_with_ml_model = True + listed_tuple[inner_idx] = result + value[idx] = listed_tuple + else: + value[idx] = serialize_possible_ml_model(item)[1] + if has_inner_tuple_with_ml_model: + ensemble_object.__dict__[key] = { + "pymiloed-data-structure": "list of (str, estimator) tuples", + "pymiloed-data": value, + } + + elif isinstance(value, dict): + if check_str_in_iterable("pymilo-bunch", value): + new_value = {} + for inner_key, inner_value in value["pymilo-bunch"].items(): + new_value[inner_key] = serialize_possible_ml_model(inner_value)[1] + value["pymilo-bunch"] = new_value + else: + new_value = {} + for inner_key, inner_value in value.items(): + new_value[inner_key] = serialize_possible_ml_model(inner_value)[1] + ensemble_object.__dict__[key] = new_value + + elif isinstance(value, ndarray): + has_inner_model, result = serialize_models_in_ndarray(value) + if has_inner_model: + ensemble_object.__dict__[key] = result + + else: + ensemble_object.__dict__[key] = serialize_possible_ml_model(value)[1] + + self._transporters["GeneralDataStructureTransporter"].transport(ensemble_object, Command.SERIALIZE) + + return ensemble_object.__dict__ + + def deserialize(self, ensemble, is_inner_model=False): + """ + Return the associated sklearn ensemble model of the given ensemble. + + :param ensemble: given json string of a ensemble model to get deserialized to associated sklearn ensemble model + :type ensemble: obj + :param is_inner_model: determines whether it is an inner ensemble model of a super ml model + :type is_inner_model: boolean + :return: associated sklearn ensemble model + """ + data = None + if is_inner_model: + data = ensemble["data"] + else: + data = ensemble.data + + for transporter in self._transporters: + if transporter != "GeneralDataStructureTransporter": + self._transporters[transporter].transport( + ensemble, Command.DESERIALIZE, is_inner_model) + + for key, value in data.items(): + if isinstance(value, dict): + if check_str_in_iterable("pymiloed-data-structure", + value) and value["pymiloed-data-structure"] == "list of (str, estimator) tuples": + listed_tuples = value["pymiloed-data"] + list_of_tuples = [] + pt = PreprocessingTransporter() + for listed_tuple in listed_tuples: + name, serialized_model = listed_tuple + retrieved_model = pt.deserialize_pre_module(serialized_model) if pt.is_preprocessing_module( + serialized_model) else deserialize_possible_ml_model(serialized_model)[1] + list_of_tuples.append( + (name, retrieved_model) + ) + data[key] = list_of_tuples + + elif GeneralDataStructureTransporter().is_deserialized_ndarray(value): + has_inner_model, result = deserialize_models_in_ndarray(value) + if has_inner_model: + data[key] = result + + if isinstance(value, list): + for idx, item in enumerate(value): + has_ml_model, result = deserialize_possible_ml_model(item) + if has_ml_model: + value[idx] = result + + has_ml_model, result = deserialize_possible_ml_model(value) + if has_ml_model: + data[key] = result + + self._transporters["GeneralDataStructureTransporter"].transport(ensemble, Command.DESERIALIZE, is_inner_model) + + _type = None + raw_model = None + meta_learnings = ["StackingRegressor", "StackingClassifier", "VotingRegressor", "VotingClassifier"] + pipeline_models = ["Pipeline"] + if is_inner_model: + _type = ensemble["type"] + else: + _type = ensemble.type + + if _type in meta_learnings: + raw_model = self._supported_models[_type](estimators=data["estimators"]) + elif _type in pipeline_models: + raw_model = self._supported_models[_type](steps=data["steps"]) + else: + raw_model = self._supported_models[_type]() + + for item in data: + setattr(raw_model, item, data[item]) + return raw_model + +ensemble_chain = EnsembleModelChain(ENSEMBLE_CHAIN, SKLEARN_ENSEMBLE_TABLE) + def get_transporter(model): """ Get associated transporter for the given ML model. @@ -47,69 +182,12 @@ def get_transporter(model): """ if isinstance(model, str): if model.upper() == "ENSEMBLE": - return "ENSEMBLE", transport_ensemble - if is_ensemble(model): - return "ENSEMBLE", transport_ensemble + return "ENSEMBLE", ensemble_chain.transport + if ensemble_chain.is_supported(model): + return "ENSEMBLE", ensemble_chain.transport else: return get_concrete_transporter(model) - -def is_ensemble(model): - """ - Check if the input model is a sklearn's ensemble model. - - :param model: is a string name of a ensemble or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_ENSEMBLE_TABLE - else: - return get_sklearn_type(model) in SKLEARN_ENSEMBLE_TABLE.keys() - - -def transport_ensemble(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given ensemble to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn ensemble model - """ - if not is_inner_model: - _validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_ensemble(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc(), - }, - 'object': request, - }) - - elif command == Command.DESERIALIZE: - try: - return deserialize_ensemble(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - def serialize_possible_ml_model(possible_ml_model): """ Check whether the given object is a ML model and if it is, serialize it. @@ -131,7 +209,6 @@ def serialize_possible_ml_model(possible_ml_model): else: return False, possible_ml_model - def deserialize_possible_ml_model(possible_serialized_ml_model): """ Check whether the given object is previously serialized ML model and if it is, deserialize it back to the associated ML model. @@ -149,69 +226,6 @@ def deserialize_possible_ml_model(possible_serialized_ml_model): else: return False, possible_serialized_ml_model - -def serialize_ensemble(ensemble_object): - """ - Return the serialized json string of the given ensemble model. - - :param ensemble_object: given model to be get serialized - :type ensemble_object: any sklearn ensemble model - :return: the serialized json string of the given ensemble - """ - for transporter in ENSEMBLE_CHAIN: - if transporter != "GeneralDataStructureTransporter": - ENSEMBLE_CHAIN[transporter].transport( - ensemble_object, Command.SERIALIZE) - - for key, value in ensemble_object.__dict__.items(): - if isinstance(value, list): - has_inner_tuple_with_ml_model = False - pt = PreprocessingTransporter() - for idx, item in enumerate(value): - if isinstance(item, tuple): - listed_tuple = list(item) - for inner_idx, inner_item in enumerate(listed_tuple): - if pt.is_preprocessing_module(inner_item): - listed_tuple[inner_idx] = pt.serialize_pre_module(inner_item) - else: - has_inner_model, result = serialize_possible_ml_model(inner_item) - if has_inner_model: - has_inner_tuple_with_ml_model = True - listed_tuple[inner_idx] = result - value[idx] = listed_tuple - else: - value[idx] = serialize_possible_ml_model(item)[1] - if has_inner_tuple_with_ml_model: - ensemble_object.__dict__[key] = { - "pymiloed-data-structure": "list of (str, estimator) tuples", - "pymiloed-data": value, - } - - elif isinstance(value, dict): - if check_str_in_iterable("pymilo-bunch", value): - new_value = {} - for inner_key, inner_value in value["pymilo-bunch"].items(): - new_value[inner_key] = serialize_possible_ml_model(inner_value)[1] - value["pymilo-bunch"] = new_value - else: - new_value = {} - for inner_key, inner_value in value.items(): - new_value[inner_key] = serialize_possible_ml_model(inner_value)[1] - ensemble_object.__dict__[key] = new_value - - elif isinstance(value, ndarray): - has_inner_model, result = serialize_models_in_ndarray(value) - if has_inner_model: - ensemble_object.__dict__[key] = result - - else: - ensemble_object.__dict__[key] = serialize_possible_ml_model(value)[1] - - ENSEMBLE_CHAIN["GeneralDataStructureTransporter"].transport(ensemble_object, Command.SERIALIZE) - - return ensemble_object.__dict__ - - def serialize_models_in_ndarray(ndarray_instance): """ Serialize the ml models inside the given ndarray. @@ -254,7 +268,6 @@ def serialize_models_in_ndarray(ndarray_instance): 'pymiloed-data-structure': 'numpy.ndarray' } - def deserialize_models_in_ndarray(serialized_ndarray): """ Deserializes possible ML models within the given ndarray instance. @@ -297,110 +310,3 @@ def deserialize_models_in_ndarray(serialized_ndarray): dtype = literal_eval(dtype) return True, asarray(new_list, dtype=dtype) - - -def deserialize_ensemble(ensemble, is_inner_model=False): - """ - Return the associated sklearn ensemble model of the given ensemble. - - :param ensemble: given json string of a ensemble model to get deserialized to associated sklearn ensemble model - :type ensemble: obj - :param is_inner_model: determines whether it is an inner ensemble model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn ensemble model - """ - data = None - if is_inner_model: - data = ensemble["data"] - else: - data = ensemble.data - - for transporter in ENSEMBLE_CHAIN: - if transporter != "GeneralDataStructureTransporter": - ENSEMBLE_CHAIN[transporter].transport( - ensemble, Command.DESERIALIZE, is_inner_model) - - for key, value in data.items(): - if isinstance(value, dict): - if check_str_in_iterable("pymiloed-data-structure", - value) and value["pymiloed-data-structure"] == "list of (str, estimator) tuples": - listed_tuples = value["pymiloed-data"] - list_of_tuples = [] - pt = PreprocessingTransporter() - for listed_tuple in listed_tuples: - name, serialized_model = listed_tuple - retrieved_model = pt.deserialize_pre_module(serialized_model) if pt.is_preprocessing_module( - serialized_model) else deserialize_possible_ml_model(serialized_model)[1] - list_of_tuples.append( - (name, retrieved_model) - ) - data[key] = list_of_tuples - - elif GeneralDataStructureTransporter().is_deserialized_ndarray(value): - has_inner_model, result = deserialize_models_in_ndarray(value) - if has_inner_model: - data[key] = result - - if isinstance(value, list): - for idx, item in enumerate(value): - has_ml_model, result = deserialize_possible_ml_model(item) - if has_ml_model: - value[idx] = result - - has_ml_model, result = deserialize_possible_ml_model(value) - if has_ml_model: - data[key] = result - - ENSEMBLE_CHAIN["GeneralDataStructureTransporter"].transport(ensemble, Command.DESERIALIZE, is_inner_model) - - _type = None - raw_model = None - meta_learnings = ["StackingRegressor", "StackingClassifier", "VotingRegressor", "VotingClassifier"] - pipeline_models = ["Pipeline"] - if is_inner_model: - _type = ensemble["type"] - else: - _type = ensemble.type - - if _type in meta_learnings: - raw_model = SKLEARN_ENSEMBLE_TABLE[_type](estimators=data["estimators"]) - elif _type in pipeline_models: - raw_model = SKLEARN_ENSEMBLE_TABLE[_type](steps=data["steps"]) - else: - raw_model = SKLEARN_ENSEMBLE_TABLE[_type]() - - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def _validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - - :param model: a sklearn ensemble model or a json string of it, serialized through the pymilo export - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_ensemble(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - if is_ensemble(model.type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) diff --git a/pymilo/chains/linear_model_chain.py b/pymilo/chains/linear_model_chain.py index 9a527bd8..f790f34d 100644 --- a/pymilo/chains/linear_model_chain.py +++ b/pymilo/chains/linear_model_chain.py @@ -1,19 +1,15 @@ # -*- coding: utf-8 -*- """PyMilo chain for linear models.""" -from ..transporters.transporter import Command -from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter +from .chain import AbstractChain from ..transporters.baseloss_transporter import BaseLossTransporter +from ..transporters.transporter import Command +from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter from ..transporters.lossfunction_transporter import LossFunctionTransporter from ..transporters.preprocessing_transporter import PreprocessingTransporter -from ..pymilo_param import SKLEARN_LINEAR_MODEL_TABLE from ..utils.util import get_sklearn_type, is_iterable - -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes -from traceback import format_exc - +from ..pymilo_param import SKLEARN_LINEAR_MODEL_TABLE LINEAR_MODEL_CHAIN = { "PreprocessingTransporter": PreprocessingTransporter(), @@ -23,19 +19,66 @@ } -def is_linear_model(model): - """ - Check if the input model is a sklearn's linear model. - - :param model: name of a linear model or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_LINEAR_MODEL_TABLE - else: - return get_sklearn_type(model) in SKLEARN_LINEAR_MODEL_TABLE.keys() - +class LinearModelChain(AbstractChain): + """LinearModelChain developed to handle sklearn Linear ML model transportation.""" + + def serialize(self, linear_model_object): + """ + Return the serialized json string of the given linear model. + + :param linear_model_object: given model to be get serialized + :type linear_model_object: any sklearn linear model + :return: the serialized json string of the given linear model + """ + # first serializing the inner linear models... + for key in linear_model_object.__dict__: + if self.is_supported(linear_model_object.__dict__[key]): + linear_model_object.__dict__[key] = { + "pymilo-inner-model-data": self.transport(linear_model_object.__dict__[key], Command.SERIALIZE, True), + "pymilo-inner-model-type": get_sklearn_type(linear_model_object.__dict__[key]), + "pymilo-bypass": True + } + # now serializing non-linear model fields + for transporter in self._transporters: + self._transporters[transporter].transport( + linear_model_object, Command.SERIALIZE) + return linear_model_object.__dict__ + + def deserialize(self, linear_model, is_inner_model=False): + """ + Return the associated sklearn linear model of the given linear_model. + + :param linear_model: given json string of a linear model to get deserialized to associated sklearn linear model + :type linear_model: obj + :param is_inner_model: determines whether it is an inner model of a super ml model + :type is_inner_model: boolean + :return: associated sklearn linear model + """ + raw_model = None + data = None + if is_inner_model: + raw_model = self._supported_models[linear_model["type"]]() + data = linear_model["data"] + else: + raw_model = self._supported_models[linear_model.type]() + data = linear_model.data + # first deserializing the inner linear models(one depth inner linear + # models have been deserialized -> TODO full depth). + for key in data: + if is_deserialized_linear_model(data[key]): + data[key] = self.transport({ + "data": data[key]["pymilo-inner-model-data"], + "type": data[key]["pymilo-inner-model-type"] + }, Command.DESERIALIZE, is_inner_model=True) + # now deserializing non-linear models fields + for transporter in self._transporters: + self._transporters[transporter].transport( + linear_model, Command.DESERIALIZE, is_inner_model) + for item in data: + setattr(raw_model, item, data[item]) + return raw_model + +linear_chain = LinearModelChain(LINEAR_MODEL_CHAIN, SKLEARN_LINEAR_MODEL_TABLE) def is_deserialized_linear_model(content): """ @@ -48,134 +91,3 @@ def is_deserialized_linear_model(content): if not is_iterable(content): return False return "pymilo-inner-model-type" in content and "pymilo-inner-model-data" in content - - -def transport_linear_model(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given model to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn model - """ - if not is_inner_model: - validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_linear_model(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - elif command == Command.DESERIALIZE: - try: - return deserialize_linear_model(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - -def serialize_linear_model(linear_model_object): - """ - Return the serialized json string of the given linear model. - - :param linear_model_object: given model to be get serialized - :type linear_model_object: any sklearn linear model - :return: the serialized json string of the given linear model - """ - # first serializing the inner linear models... - for key in linear_model_object.__dict__: - if is_linear_model(linear_model_object.__dict__[key]): - linear_model_object.__dict__[key] = { - "pymilo-inner-model-data": transport_linear_model(linear_model_object.__dict__[key], Command.SERIALIZE, True), - "pymilo-inner-model-type": get_sklearn_type(linear_model_object.__dict__[key]), - "pymilo-bypass": True - } - # now serializing non-linear model fields - for transporter in LINEAR_MODEL_CHAIN: - LINEAR_MODEL_CHAIN[transporter].transport( - linear_model_object, Command.SERIALIZE) - return linear_model_object.__dict__ - - -def deserialize_linear_model(linear_model, is_inner_model): - """ - Return the associated sklearn linear model of the given linear_model. - - :param linear_model: given json string of a linear model to get deserialized to associated sklearn linear model - :type linear_model: obj - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn linear model - """ - raw_model = None - data = None - if is_inner_model: - raw_model = SKLEARN_LINEAR_MODEL_TABLE[linear_model["type"]]() - data = linear_model["data"] - else: - raw_model = SKLEARN_LINEAR_MODEL_TABLE[linear_model.type]() - data = linear_model.data - # first deserializing the inner linear models(one depth inner linear - # models have been deserialized -> TODO full depth). - for key in data: - if is_deserialized_linear_model(data[key]): - data[key] = transport_linear_model({ - "data": data[key]["pymilo-inner-model-data"], - "type": data[key]["pymilo-inner-model-type"] - }, Command.DESERIALIZE, is_inner_model=True) - # now deserializing non-linear models fields - for transporter in LINEAR_MODEL_CHAIN: - LINEAR_MODEL_CHAIN[transporter].transport( - linear_model, Command.DESERIALIZE, is_inner_model) - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - - :param model: a sklearn linear model or a json string of it, serialized through the pymilo export. - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_linear_model(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - model_type = model.type - if is_linear_model(model_type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) diff --git a/pymilo/chains/naive_bayes_chain.py b/pymilo/chains/naive_bayes_chain.py index a31e3644..79fea4f9 100644 --- a/pymilo/chains/naive_bayes_chain.py +++ b/pymilo/chains/naive_bayes_chain.py @@ -1,148 +1,15 @@ # -*- coding: utf-8 -*- -"""PyMilo chain for naive bayes models.""" -from ..transporters.transporter import Command +"""PyMilo chain for Naive Bayes models.""" +from ..chains.chain import AbstractChain +from ..pymilo_param import SKLEARN_NAIVE_BAYES_TABLE from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter from ..transporters.preprocessing_transporter import PreprocessingTransporter -from ..pymilo_param import SKLEARN_NAIVE_BAYES_TABLE -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes - -from ..utils.util import get_sklearn_type - -from traceback import format_exc - -NAIVE_BAYES_CHAIN = { - "PreprocessingTransporter": PreprocessingTransporter(), - "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), -} - - -def is_naive_bayes(model): - """ - Check if the input model is a sklearn's naive bayes model. - - :param model: is a string name of a naive bayes or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_NAIVE_BAYES_TABLE - else: - return get_sklearn_type(model) in SKLEARN_NAIVE_BAYES_TABLE.keys() - - -def transport_naive_bayes(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given naive bayes to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn naive bayes model - """ - if not is_inner_model: - _validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_naive_bayes(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc(), - }, - 'object': request, - }) - - elif command == Command.DESERIALIZE: - try: - return deserialize_naive_bayes(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - -def serialize_naive_bayes(naive_bayes_object): - """ - Return the serialized json string of the given naive bayes model. - - :param naive_bayes_object: given model to be get serialized - :type naive_bayes_object: any sklearn naive bayes model - :return: the serialized json string of the given naive bayes - """ - for transporter in NAIVE_BAYES_CHAIN: - NAIVE_BAYES_CHAIN[transporter].transport( - naive_bayes_object, Command.SERIALIZE) - return naive_bayes_object.__dict__ - - -def deserialize_naive_bayes(naive_bayes, is_inner_model=False): - """ - Return the associated sklearn naive bayes model of the given naive bayes. - - :param naive bayes: given json string of a naive bayes model to get deserialized to associated sklearn naive bayes model - :type naive bayes: obj - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn naive bayes model - """ - raw_model = None - data = None - if is_inner_model: - raw_model = SKLEARN_NAIVE_BAYES_TABLE[naive_bayes["type"]]() - data = naive_bayes["data"] - else: - raw_model = SKLEARN_NAIVE_BAYES_TABLE[naive_bayes.type]() - data = naive_bayes.data - - for transporter in NAIVE_BAYES_CHAIN: - NAIVE_BAYES_CHAIN[transporter].transport( - naive_bayes, Command.DESERIALIZE, is_inner_model) - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def _validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - - :param model: a sklearn naive bayes model or a json string of it, serialized through the pymilo export. - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_naive_bayes(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - if is_naive_bayes(model.type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) +naive_bayes_chain = AbstractChain( + { + "PreprocessingTransporter": PreprocessingTransporter(), + "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), + }, + SKLEARN_NAIVE_BAYES_TABLE, +) diff --git a/pymilo/chains/neighbours_chain.py b/pymilo/chains/neighbours_chain.py index f9259383..f53be6a7 100644 --- a/pymilo/chains/neighbours_chain.py +++ b/pymilo/chains/neighbours_chain.py @@ -1,150 +1,17 @@ # -*- coding: utf-8 -*- -"""PyMilo chain for neighbors models.""" -from ..transporters.transporter import Command +"""PyMilo chain for Neighbors models.""" +from ..chains.chain import AbstractChain +from ..pymilo_param import SKLEARN_NEIGHBORS_TABLE from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter from ..transporters.neighbors_tree_transporter import NeighborsTreeTransporter from ..transporters.preprocessing_transporter import PreprocessingTransporter -from ..pymilo_param import SKLEARN_NEIGHBORS_TABLE -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes - -from ..utils.util import get_sklearn_type - -from traceback import format_exc - -NEIGHBORS_CHAIN = { - "PreprocessingTransporter": PreprocessingTransporter(), - "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), - "NeighborsTreeTransporter": NeighborsTreeTransporter(), -} - - -def is_neighbors(model): - """ - Check if the input model is a sklearn's neighbors model. - - :param model: is a string name of a neighbor or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_NEIGHBORS_TABLE - else: - return get_sklearn_type(model) in SKLEARN_NEIGHBORS_TABLE.keys() - - -def transport_neighbor(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given neighbor to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn neighbors model - """ - if not is_inner_model: - _validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_neighbor(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc(), - }, - 'object': request, - }) - - elif command == Command.DESERIALIZE: - try: - return deserialize_neighbor(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - -def serialize_neighbor(neighbor_object): - """ - Return the serialized json string of the given neighbor model. - - :param neighbor_object: given model to be get serialized - :type neighbor_object: any sklearn neighbor model - :return: the serialized json string of the given neighbor - """ - for transporter in NEIGHBORS_CHAIN: - NEIGHBORS_CHAIN[transporter].transport( - neighbor_object, Command.SERIALIZE) - return neighbor_object.__dict__ - - -def deserialize_neighbor(neighbor, is_inner_model=False): - """ - Return the associated sklearn neighbor model of the given neighbor. - - :param neighbor: given json string of a neighbor model to get deserialized to associated sklearn neighbors model - :type neighbor: obj - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn neighbor model - """ - raw_model = None - data = None - if is_inner_model: - raw_model = SKLEARN_NEIGHBORS_TABLE[neighbor["type"]]() - data = neighbor["data"] - else: - raw_model = SKLEARN_NEIGHBORS_TABLE[neighbor.type]() - data = neighbor.data - - for transporter in NEIGHBORS_CHAIN: - NEIGHBORS_CHAIN[transporter].transport( - neighbor, Command.DESERIALIZE, is_inner_model) - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def _validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - - :param model: a sklearn neighbor model or a json string of it, serialized through the pymilo export. - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_neighbors(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - if is_neighbors(model.type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) +neighbors_chain = AbstractChain( + { + "PreprocessingTransporter": PreprocessingTransporter(), + "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), + "NeighborsTreeTransporter": NeighborsTreeTransporter(), + }, + SKLEARN_NEIGHBORS_TABLE, +) diff --git a/pymilo/chains/neural_network_chain.py b/pymilo/chains/neural_network_chain.py index 2b7ea41c..a72c1820 100644 --- a/pymilo/chains/neural_network_chain.py +++ b/pymilo/chains/neural_network_chain.py @@ -1,156 +1,21 @@ # -*- coding: utf-8 -*- -"""PyMilo chain for neural network models.""" -from ..transporters.transporter import Command +"""PyMilo chain for Neural Network models.""" +from ..chains.chain import AbstractChain +from ..pymilo_param import SKLEARN_NEURAL_NETWORK_TABLE +from ..transporters.adamoptimizer_transporter import AdamOptimizerTransporter from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter +from ..transporters.preprocessing_transporter import PreprocessingTransporter from ..transporters.randomstate_transporter import RandomStateTransporter from ..transporters.sgdoptimizer_transporter import SGDOptimizerTransporter -from ..transporters.adamoptimizer_transporter import AdamOptimizerTransporter -from ..transporters.preprocessing_transporter import PreprocessingTransporter - -from ..pymilo_param import SKLEARN_NEURAL_NETWORK_TABLE - -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes - -from ..utils.util import get_sklearn_type - -from traceback import format_exc - - -NEURAL_NETWORK_CHAIN = { - "PreprocessingTransporter": PreprocessingTransporter(), - "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), - "RandomStateTransporter": RandomStateTransporter(), - "SGDOptimizer": SGDOptimizerTransporter(), - "AdamOptimizerTransporter": AdamOptimizerTransporter(), -} - - -def is_neural_network(model): - """ - Check if the input model is a sklearn's neural network. - - :param model: is a string name of a neural network or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_NEURAL_NETWORK_TABLE - else: - return get_sklearn_type(model) in SKLEARN_NEURAL_NETWORK_TABLE.keys() - - -def transport_neural_network(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given neural network model to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn neural network model - """ - if not is_inner_model: - _validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_neural_network(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc(), - }, - 'object': request, - }) - - elif command == Command.DESERIALIZE: - try: - return deserialize_neural_network(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - -def serialize_neural_network(neural_network_object): - """ - Return the serialized json string of the given neural network model. - - :param neural_network_object: given model to be get serialized - :type neural_network_object: any sklearn neural network model - :return: the serialized json string of the given neural network model - """ - for transporter in NEURAL_NETWORK_CHAIN: - NEURAL_NETWORK_CHAIN[transporter].transport( - neural_network_object, Command.SERIALIZE) - return neural_network_object.__dict__ - - -def deserialize_neural_network(neural_network, is_inner_model=False): - """ - Return the associated sklearn neural network model of the given neural_network. - - :param neural_network: given json string of a neural network model to get deserialized to associated sklearn NN model - :type neural_network: obj - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn NN model - """ - raw_model = None - data = None - if is_inner_model: - raw_model = SKLEARN_NEURAL_NETWORK_TABLE[neural_network["type"]]() - data = neural_network["data"] - else: - raw_model = SKLEARN_NEURAL_NETWORK_TABLE[neural_network.type]() - data = neural_network.data - - for transporter in NEURAL_NETWORK_CHAIN: - NEURAL_NETWORK_CHAIN[transporter].transport( - neural_network, Command.DESERIALIZE, is_inner_model) - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def _validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - :param model: a sklearn neural network model or a json string of it, serialized through the pymilo export. - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_neural_network(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - if is_neural_network(model.type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) +neural_network_chain = AbstractChain( + { + "PreprocessingTransporter": PreprocessingTransporter(), + "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), + "RandomStateTransporter": RandomStateTransporter(), + "SGDOptimizer": SGDOptimizerTransporter(), + "AdamOptimizerTransporter": AdamOptimizerTransporter(), + }, + SKLEARN_NEURAL_NETWORK_TABLE, +) diff --git a/pymilo/chains/svm_chain.py b/pymilo/chains/svm_chain.py index 87d0acf2..b6a5b867 100644 --- a/pymilo/chains/svm_chain.py +++ b/pymilo/chains/svm_chain.py @@ -1,150 +1,17 @@ # -*- coding: utf-8 -*- -"""PyMilo chain for svm models.""" -from ..transporters.transporter import Command +"""PyMilo chain for SVM models.""" +from ..chains.chain import AbstractChain +from ..pymilo_param import SKLEARN_SVM_TABLE +from ..transporters.preprocessing_transporter import PreprocessingTransporter from ..transporters.general_data_structure_transporter import GeneralDataStructureTransporter from ..transporters.randomstate_transporter import RandomStateTransporter -from ..transporters.preprocessing_transporter import PreprocessingTransporter - -from ..pymilo_param import SKLEARN_SVM_TABLE -from ..exceptions.serialize_exception import PymiloSerializationException, SerializationErrorTypes -from ..exceptions.deserialize_exception import PymiloDeserializationException, DeserializationErrorTypes - -from ..utils.util import get_sklearn_type - -from traceback import format_exc - -SVM_CHAIN = { - "PreprocessingTransporter": PreprocessingTransporter(), - "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), - "RandomStateTransporter": RandomStateTransporter(), -} - - -def is_svm(model): - """ - Check if the input model is a sklearn's svm model. - - :param model: is a string name of a svm or a sklearn object of it - :type model: any object - :return: check result as bool - """ - if isinstance(model, str): - return model in SKLEARN_SVM_TABLE - else: - return get_sklearn_type(model) in SKLEARN_SVM_TABLE.keys() - - -def transport_svm(request, command, is_inner_model=False): - """ - Return the transported (Serialized or Deserialized) model. - - :param request: given svm to be transported - :type request: any object - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: the transported request as a json string or sklearn svm model - """ - if not is_inner_model: - _validate_input(request, command) - - if command == Command.SERIALIZE: - try: - return serialize_svm(request) - except Exception as e: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc(), - }, - 'object': request, - }) - - elif command == Command.DESERIALIZE: - try: - return deserialize_svm(request, is_inner_model) - except Exception as e: - raise PymiloDeserializationException( - { - 'error_type': SerializationErrorTypes.VALID_MODEL_INVALID_INTERNAL_STRUCTURE, - 'error': { - 'Exception': repr(e), - 'Traceback': format_exc()}, - 'object': request}) - - -def serialize_svm(svm_object): - """ - Return the serialized json string of the given svm model. - - :param svm_object: given model to be get serialized - :type svm_object: any sklearn svm model - :return: the serialized json string of the given svm - """ - for transporter in SVM_CHAIN: - SVM_CHAIN[transporter].transport( - svm_object, Command.SERIALIZE) - return svm_object.__dict__ - - -def deserialize_svm(svm, is_inner_model=False): - """ - Return the associated sklearn svm model of the given svm. - - :param svm: given json string of a svm model to get deserialized to associated sklearn svm model - :type svm: obj - :param is_inner_model: determines whether it is an inner model of a super ml model - :type is_inner_model: boolean - :return: associated sklearn svm model - """ - raw_model = None - data = None - if is_inner_model: - raw_model = SKLEARN_SVM_TABLE[svm["type"]]() - data = svm["data"] - else: - raw_model = SKLEARN_SVM_TABLE[svm.type]() - data = svm.data - - for transporter in SVM_CHAIN: - SVM_CHAIN[transporter].transport( - svm, Command.DESERIALIZE, is_inner_model) - for item in data: - setattr(raw_model, item, data[item]) - return raw_model - - -def _validate_input(model, command): - """ - Check if the provided inputs are valid in relation to each other. - :param model: a sklearn svm model or a json string of it, serialized through the pymilo export. - :type model: obj - :param command: command to specify whether the request should be serialized or deserialized - :type command: transporter.Command - :return: None - """ - if command == Command.SERIALIZE: - if is_svm(model): - return - else: - raise PymiloSerializationException( - { - 'error_type': SerializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) - elif command == Command.DESERIALIZE: - if is_svm(model.type): - return - else: - raise PymiloDeserializationException( - { - 'error_type': DeserializationErrorTypes.INVALID_MODEL, - 'object': model - } - ) +svm_chain = AbstractChain( + { + "PreprocessingTransporter": PreprocessingTransporter(), + "GeneralDataStructureTransporter": GeneralDataStructureTransporter(), + "RandomStateTransporter": RandomStateTransporter(), + }, + SKLEARN_SVM_TABLE, +) diff --git a/pymilo/chains/util.py b/pymilo/chains/util.py index 94239929..4f10bbad 100644 --- a/pymilo/chains/util.py +++ b/pymilo/chains/util.py @@ -1,23 +1,25 @@ # -*- coding: utf-8 -*- """useful utilities for chains.""" -from .linear_model_chain import transport_linear_model, is_linear_model -from .neural_network_chain import transport_neural_network, is_neural_network -from .decision_tree_chain import transport_decision_tree, is_decision_tree -from .clustering_chain import transport_clusterer, is_clusterer -from .naive_bayes_chain import transport_naive_bayes, is_naive_bayes -from .svm_chain import transport_svm, is_svm -from .neighbours_chain import transport_neighbor, is_neighbors -from .cross_decomposition_chain import transport_cross_decomposition, is_cross_decomposition + +from .linear_model_chain import linear_chain +from .neural_network_chain import neural_network_chain +from .decision_tree_chain import decision_trees_chain +from .clustering_chain import clustering_chain +from .naive_bayes_chain import naive_bayes_chain +from .svm_chain import svm_chain +from .neighbours_chain import neighbors_chain +from .cross_decomposition_chain import cross_decomposition_chain + MODEL_TYPE_TRANSPORTER = { - "LINEAR_MODEL": transport_linear_model, - "NEURAL_NETWORK": transport_neural_network, - "DECISION_TREE": transport_decision_tree, - "CLUSTERING": transport_clusterer, - "NAIVE_BAYES": transport_naive_bayes, - "SVM": transport_svm, - "NEIGHBORS": transport_neighbor, - "CROSS_DECOMPOSITION": transport_cross_decomposition, + "LINEAR_MODEL": linear_chain.transport, + "NEURAL_NETWORK": neural_network_chain.transport, + "DECISION_TREE": decision_trees_chain.transport, + "CLUSTERING": clustering_chain.transport, + "NAIVE_BAYES": naive_bayes_chain.transport, + "SVM": svm_chain.transport, + "NEIGHBORS": neighbors_chain.transport, + "CROSS_DECOMPOSITION": cross_decomposition_chain.transport, } @@ -34,21 +36,21 @@ def get_concrete_transporter(model): if upper_model in MODEL_TYPE_TRANSPORTER.keys(): return upper_model, MODEL_TYPE_TRANSPORTER[upper_model] - if is_linear_model(model): - return "LINEAR_MODEL", transport_linear_model - elif is_neural_network(model): - return "NEURAL_NETWORK", transport_neural_network - elif is_decision_tree(model): - return "DECISION_TREE", transport_decision_tree - elif is_clusterer(model): - return "CLUSTERING", transport_clusterer - elif is_naive_bayes(model): - return "NAIVE_BAYES", transport_naive_bayes - elif is_svm(model): - return "SVM", transport_svm - elif is_neighbors(model): - return "NEIGHBORS", transport_neighbor - elif is_cross_decomposition(model): - return "CROSS_DECOMPOSITION", transport_cross_decomposition + if linear_chain.is_supported(model): + return "LINEAR_MODEL", linear_chain.transport + elif neural_network_chain.is_supported(model): + return "NEURAL_NETWORK", neural_network_chain.transport + elif decision_trees_chain.is_supported(model): + return "DECISION_TREE", decision_trees_chain.transport + elif clustering_chain.is_supported(model): + return "CLUSTERING", clustering_chain.transport + elif naive_bayes_chain.is_supported(model): + return "NAIVE_BAYES", naive_bayes_chain.transport + elif svm_chain.is_supported(model): + return "SVM", svm_chain.transport + elif neighbors_chain.is_supported(model): + return "NEIGHBORS", neighbors_chain.transport + elif cross_decomposition_chain.is_supported(model): + return "CROSS_DECOMPOSITION", cross_decomposition_chain.transport else: return None, None diff --git a/pymilo/utils/test_pymilo.py b/pymilo/utils/test_pymilo.py index cff404ba..9df0800f 100644 --- a/pymilo/utils/test_pymilo.py +++ b/pymilo/utils/test_pymilo.py @@ -6,7 +6,8 @@ from ..pymilo_obj import Export from ..pymilo_obj import Import -from ..pymilo_func import get_transporter, compare_model_outputs +from ..chains.ensemble_chain import get_transporter +from ..pymilo_func import compare_model_outputs from ..pymilo_param import EXPORTED_MODELS_PATH from sklearn.metrics import mean_squared_error, r2_score diff --git a/tests/test_exceptions/export_exceptions.py b/tests/test_exceptions/export_exceptions.py index 30ec0b42..ca722635 100644 --- a/tests/test_exceptions/export_exceptions.py +++ b/tests/test_exceptions/export_exceptions.py @@ -2,7 +2,7 @@ # VALID_MODEL_INVALID_INTERNAL_STRUCTURE = 2 -> tested. from pymilo.utils.data_exporter import prepare_simple_regression_datasets from pymilo.utils.test_pymilo import pymilo_regression_test -from pymilo.chains.neural_network_chain import transport_neural_network +from pymilo.chains.neural_network_chain import neural_network_chain from pymilo.transporters.transporter import Command from sklearn.linear_model import LinearRegression @@ -60,7 +60,7 @@ def valid_model_irrelevant_chain(print_output = True): # Train the model using the training sets linear_regression.fit(x_train, y_train) try: - transport_neural_network(linear_regression, Command.SERIALIZE) + neural_network_chain.transport(linear_regression, Command.SERIALIZE) return False except Exception as e: if print_output: print("An Exception occured\n", e)