kelvinheng92
diff --git a/‎.gitignore
+130 b/‎.gitignore
+130
diff --git a/‎Dockerfile
+13 b/‎Dockerfile
+13
diff --git a/‎LICENSE
+21 b/‎LICENSE
+21
diff --git a/‎README.md
+18 b/‎README.md
+18
diff --git a/‎build_push_image.sh
+12 b/‎build_push_image.sh
+12
diff --git a/‎core/clean_data.py
+16 b/‎core/clean_data.py
+16
diff --git a/‎core/load_data.py
+28 b/‎core/load_data.py
+28
diff --git a/‎core/predict_data.py
+19 b/‎core/predict_data.py
+19
@@ -0,0 +1,130 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+.DS_Store
@@ -0,0 +1,13 @@
+FROM python:3.7
+
+WORKDIR /app
+
+RUN pip install pandas scikit-learn flask gunicorn
+
+ADD ./core ./core
+ADD ./models ./models
+ADD main.py main.py
+
+EXPOSE 5000
+
+CMD [ "gunicorn", "--bind", "0.0.0.0:5000", "main:app" ]
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Casper Bøgeskov Hansen
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,18 @@
+# Predicting Miles Per Gallon
+
+Just a sample ML application on a toy dataset wrapped in a Docker container.
+
+An example of input to Flask service in main.py:
+
+```json
+{
+    "cylinders": 8,
+    "displacement": 307.0,
+    "horsepower": 130.0,
+    "weight": 3504,
+    "acceleration": 12.0,
+    "model_year": 70,
+    "origin": 1,
+    "car_name": "chevrolet chevelle malibu"
+}
+```
@@ -0,0 +1,12 @@
+#!/bin/bash
+ADDRESS=gcr.io
+PROJECT_ID=ml-docker-kubernetes
+REPOSITORY=auto
+VERSION=1.0.0
+
+docker build -t ${PROJECT_ID}:${VERSION} .
+ID="$(sudo docker images | grep ${PROJECT_ID} | head -n 1 | awk '{print $3}')"
+
+docker tag ${ID} $ADDRESS/${PROJECT_ID}/${REPOSITORY}:${VERSION}
+
+docker push $ADDRESS/${PROJECT_ID}/${REPOSITORY}:${VERSION}
@@ -0,0 +1,16 @@
+
+class CleanData():
+    def __init__(self):
+        pass
+
+    def clear_question_marks(self, df):
+        df = df[df['horsepower'] != '?']
+        df.astype({"horsepower": float})
+
+        return df
+
+    def drop_unused_columns(self, df):
+        return df.drop(['mpg', 'car_name'], axis=1)
+
+    def drop_car_name(self, df):
+        return df.drop(['car_name'], axis=1)
@@ -0,0 +1,28 @@
+import pandas as pd
+import joblib
+
+class LoadData():
+    def __init__(self):
+        self.columns = ['mpg',
+                        'cylinders',
+                        'displacement',
+                        'horsepower',
+                        'weight',
+                        'acceleration',
+                        'model_year',
+                        'origin',
+                        'car_name']
+
+    def load_dataset_as_df(self):
+        df = pd.read_table("data/auto-mpg.data", header=None, delim_whitespace=True)
+        df.columns = self.columns
+
+        return df
+
+    def load_model_from_path(self, path):
+        return joblib.load(path)
+
+    def json_to_df(self, json):
+        df = pd.DataFrame(json, index=[0])
+
+        return df
@@ -0,0 +1,19 @@
+from sklearn.metrics import r2_score
+
+class PredictData():
+    def __init__(self, model = None):
+        self.model = model
+        self.pred = []
+        self.score = None
+
+    def predict(self, X_test, model = None):
+        if model != None:
+            self.pred = model.predict(X_test)
+        else:
+            self.pred = self.model.predict(X_test)
+        
+        return self.pred
+
+    def score_r2(self, y_test):
+        self.score = r2_score(y_test, self.pred)
+        return self.score