Skip to content

Commit

Permalink
Merge pull request #103 from nidhaloff/feature/cnn
Browse files Browse the repository at this point in the history
Feature/cnn
  • Loading branch information
nidhaloff authored Sep 17, 2021
2 parents 65c89c6 + 3df059a commit fd6e7a9
Show file tree
Hide file tree
Showing 11 changed files with 373 additions and 61 deletions.
8 changes: 8 additions & 0 deletions examples/auto-ml/igel.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@


model:
type: ImageClassification
arguments:
max_trials: 1
target:
- label
77 changes: 70 additions & 7 deletions igel/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,19 @@
import subprocess
from pathlib import Path

import igel
import click
import igel
import pandas as pd
from igel import Igel, metrics_dict
from igel.cnn import IgelCNN
from igel.constants import Constants
from igel.servers import fastapi_server
from igel.utils import print_models_overview, show_model_info, tableize

logger = logging.getLogger(__name__)
CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])


@click.group()
def cli():
"""
Expand Down Expand Up @@ -71,6 +73,23 @@ def fit(data_path: str, yaml_path: str) -> None:
Igel(cmd="fit", data_path=data_path, yaml_path=yaml_path)


@cli.command(context_settings=CONTEXT_SETTINGS)
@click.option(
"--data_path", "-dp", required=True, help="Path to your training dataset"
)
@click.option(
"--yaml_path",
"-yml",
required=True,
help="Path to your igel configuration file (yaml or json file)",
)
def auto_train(data_path: str, yaml_path: str) -> None:
"""
Automatically search for and train a suitable deep neural network for a task
"""
IgelCNN(cmd="train", data_path=data_path, yaml_path=yaml_path)


@cli.command(context_settings=CONTEXT_SETTINGS)
@click.option(
"--data_path", "-dp", required=True, help="Path to your evaluation dataset"
Expand All @@ -82,6 +101,17 @@ def evaluate(data_path: str) -> None:
Igel(cmd="evaluate", data_path=data_path)


@cli.command(context_settings=CONTEXT_SETTINGS)
@click.option(
"--data_path", "-dp", required=True, help="Path to your evaluation dataset"
)
def auto_evaluate(data_path: str) -> None:
"""
Evaluate the performance of an existing machine learning model
"""
IgelCNN(cmd="evaluate", data_path=data_path)


@cli.command(context_settings=CONTEXT_SETTINGS)
@click.option("--data_path", "-dp", required=True, help="Path to your dataset")
def predict(data_path: str) -> None:
Expand All @@ -90,7 +120,16 @@ def predict(data_path: str) -> None:
"""
Igel(cmd="predict", data_path=data_path)



@cli.command(context_settings=CONTEXT_SETTINGS)
@click.option("--data_path", "-dp", required=True, help="Path to your dataset")
def auto_predict(data_path: str) -> None:
"""
Use an existing machine learning model to generate predictions
"""
IgelCNN(cmd="predict", data_path=data_path)


@cli.command(context_settings=CONTEXT_SETTINGS)
@click.option(
"--data_paths",
Expand All @@ -115,7 +154,32 @@ def experiment(data_paths: str, yaml_path: str) -> None:
Igel(cmd="evaluate", data_path=eval_data_path)
Igel(cmd="predict", data_path=pred_data_path)



@cli.command(context_settings=CONTEXT_SETTINGS)
@click.option(
"--data_paths",
"-DP",
required=True,
help="Path to your datasets as string separated by space",
)
@click.option(
"--yaml_path",
"-yml",
required=True,
help="Path to your igel configuration file (yaml or json file)",
)
def auto_experiment(data_paths: str, yaml_path: str) -> None:
"""
train, evaluate and use pre-trained model for predictions in one command
"""
train_data_path, eval_data_path, pred_data_path = data_paths.strip().split(
" "
)
IgelCNN(cmd="train", data_path=train_data_path, yaml_path=yaml_path)
IgelCNN(cmd="evaluate", data_path=eval_data_path)
IgelCNN(cmd="predict", data_path=pred_data_path)


@cli.command(context_settings=CONTEXT_SETTINGS)
@click.option(
"--model_results_dir",
Expand Down Expand Up @@ -212,20 +276,20 @@ def gui():
logger.info("running igel UI...")
subprocess.check_call("npm start", shell=True)


@cli.command(context_settings=CONTEXT_SETTINGS)
def help():
"""get help about how to use igel"""
with click.Context(cli) as ctx:
click.echo(cli.get_help(ctx))


@cli.command(context_settings=CONTEXT_SETTINGS)
def version():
"""get the version of igel installed on your machine"""
print(f"igel version: {igel.__version__}")


@cli.command(context_settings=CONTEXT_SETTINGS)
def info():
"""get info & metadata about igel"""
Expand All @@ -247,4 +311,3 @@ def info():
operating system: independent
"""
)

3 changes: 3 additions & 0 deletions igel/cnn/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .cnn import IgelCNN

__all__ = ["IgelCNN"]
135 changes: 135 additions & 0 deletions igel/cnn/cnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import json
import logging
import os

import autokeras as ak
import numpy as np
import pandas as pd
from igel.cnn.defaults import Defaults
from igel.cnn.models import Models
from igel.constants import Constants
from igel.utils import read_json, read_yaml
from tensorflow.keras.preprocessing import image

logger = logging.getLogger(__name__)


class IgelCNN:
defaults = Defaults()
x = None
y = None
model = None
results_path = Constants.results_dir

def __init__(self, **cli_args):
self.cmd: str = cli_args.get("cmd")
self.data_path: str = cli_args.get("data_path")
self.config_path: str = cli_args.get("yaml_path")
logger.info(f"Executing command: {self.cmd}")
logger.info(f"Reading data from: {self.data_path}")
logger.info(f"Reading yaml configs from: {self.config_path}")

if self.cmd == "train":
self.file_ext: str = self.config_path.split(".")[1]

if self.file_ext != "yaml" and self.file_ext != "json":
raise Exception(
"Configuration file can be a yaml or a json file!"
)

self.configs: dict = (
read_json(self.config_path)
if self.file_ext == "json"
else read_yaml(self.config_path)
)

self.dataset_props: dict = self.configs.get(
"dataset", self.defaults.dataset_props
)
self.model_props: dict = self.configs.get(
"model", self.defaults.model_props
)
self.target: list = self.configs.get("target")
self.model_type = self.model_props.get("type")
self.model_args = self.model_props.get("arguments")

else:
self.model_path = cli_args.get(
"model_path", self.defaults.model_path
)
logger.info(f"path of the pre-fitted model => {self.model_path}")
self.prediction_file = cli_args.get(
"prediction_file", self.defaults.prediction_file
)
# set description.json if provided:
self.description_file = cli_args.get(
"description_file", self.defaults.description_file
)
# load description file to read stored training parameters
with open(self.description_file) as f:
dic = json.load(f)
self.target: list = dic.get(
"target"
) # target to predict as a list
self.model_type: str = dic.get("type") # type of the model
self.dataset_props: dict = dic.get(
"dataset_props"
) # dataset props entered while fitting
getattr(self, self.cmd)()

def _create_model(self, *args, **kwargs):
model_cls = Models.get(self.model_type)
model = (
model_cls() if not self.model_args else model_cls(**self.model_args)
)
return model

def _convert_img_to_np_array(self, paths):

images = []
logger.info(f"Reading images and converting them to arrays...")
for path in paths:
img = image.load_img(path, grayscale=True)
img_arr = np.asarray(img)
images.append(img_arr)
return np.array(images)

def _read_dataset(self):
# read_data_options = self.dataset_props.get("read_data_options", {})
# dataset = pd.read_csv(self.data_path, **read_data_options)
# logger.info(f"dataset shape: {dataset.shape}")
# attributes = list(dataset.columns)
# logger.info(f"dataset attributes: {attributes}")
# y = pd.concat([dataset.pop(x) for x in self.target], axis=1)
# logger.info(f"x shape: {dataset.shape} | y shape: {y.shape}")
# x = dataset.to_numpy()
# num_images = x.shape[0]
# x = x.reshape((num_images,))
# self.x = self._convert_img_to_np_array(x)
# self.y = y.to_numpy()
# logger.info(
# f"After reading images: x shape {self.x.shape} | y shape: {self.y.shape}"
# )
train_data = ak.image_dataset_from_directory(
self.data_path, subset="training", validation_split=0.2, seed=42
)
return train_data # self.x, self.y

def save_model(self, model):
exp_model = model.export_model()
logger.info(f"model type: {type(exp_model)}")
try:
exp_model.save("model", save_format="tf")
return True
except Exception:
exp_model.save(f"model.h5")

def train(self):
train_data = self._read_dataset()
self.model = self._create_model()
logger.info(f"executing a {self.model.__class__.__name__} algorithm...")
logger.info(f"Training started...")
self.model.fit(train_data)
saved = self.save_model(self.model)
if saved:
logger.info(f"model saved successfully")
21 changes: 20 additions & 1 deletion igel/cnn/defaults.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,21 @@
from igel.configs import configs


class Defaults:
pass
dataset_props = {}
model_props = {}
available_commands = ("fit", "evaluate", "predict", "experiment")
supported_types = ("regression", "classification", "clustering")
results_path = configs.get("results_path") # path to the results folder
model_path = configs.get(
"default_model_path"
) # path to the pre-fitted model
description_file = configs.get(
"description_file"
) # path to the description.json file
evaluation_file = configs.get(
"evaluation_file"
) # path to the evaluation.json file
prediction_file = configs.get(
"prediction_file"
) # path to the predictions.csv
9 changes: 9 additions & 0 deletions igel/cnn/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import autokeras as ak
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import load_model

(x_train, y_train), (x_test, y_test) = mnist.load_data()

cls = ak.ImageClassifier()
cls.fit(x_train, y_train)
9 changes: 0 additions & 9 deletions igel/cnn/model.py

This file was deleted.

39 changes: 39 additions & 0 deletions igel/cnn/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import autokeras as ak


class Models:
models_map = {
"ImageClassification": {
"class": ak.ImageClassifier,
"link": "https://autokeras.com/image_classifier/",
},
"ImageRegression": {
"class": ak.ImageRegressor,
"link": "https://autokeras.com/image_regressor/",
},
"TextClassification": {
"class": ak.TextClassifier,
"link": "https://autokeras.com/text_classifier/",
},
"TextRegression": {
"class": ak.TextRegressor,
"link": "https://autokeras.com/text_regressor/",
},
"StructuredDataClassification": {
"class": ak.StructuredDataClassifier,
"link": "https://autokeras.com/structured_data_classifier/",
},
"StructuredDataRegression": {
"class": ak.StructuredDataRegressor,
"link": "https://autokeras.com/structured_data_regressor/",
},
}

@classmethod
def get(cls, model_type: str, *args, **kwargs):
if model_type not in cls.models_map.keys():
raise Exception(
f"{model_type} is not supported! "
f"Choose one of the following supported tasks: {cls.models_map.keys()}"
)
return cls.models_map[model_type]["class"]
1 change: 1 addition & 0 deletions igel/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ class Constants:
description_file = "description.json"
prediction_file = "predictions.csv"
stats_dir = "model_results"
results_dir = "model_results"
init_file = "igel.yaml"
post_req_data_file = "post_req_data.csv"
evaluation_file = "evaluation.json"
Expand Down
Loading

0 comments on commit fd6e7a9

Please sign in to comment.