Skip to content

Commit 3207627

Browse files
author
kelvin.heng
committed
initial commit
0 parents  commit 3207627

14 files changed

+746
-0
lines changed

.gitignore

+130
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
*.py,cover
51+
.hypothesis/
52+
.pytest_cache/
53+
54+
# Translations
55+
*.mo
56+
*.pot
57+
58+
# Django stuff:
59+
*.log
60+
local_settings.py
61+
db.sqlite3
62+
db.sqlite3-journal
63+
64+
# Flask stuff:
65+
instance/
66+
.webassets-cache
67+
68+
# Scrapy stuff:
69+
.scrapy
70+
71+
# Sphinx documentation
72+
docs/_build/
73+
74+
# PyBuilder
75+
target/
76+
77+
# Jupyter Notebook
78+
.ipynb_checkpoints
79+
80+
# IPython
81+
profile_default/
82+
ipython_config.py
83+
84+
# pyenv
85+
.python-version
86+
87+
# pipenv
88+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
90+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
91+
# install all needed dependencies.
92+
#Pipfile.lock
93+
94+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
95+
__pypackages__/
96+
97+
# Celery stuff
98+
celerybeat-schedule
99+
celerybeat.pid
100+
101+
# SageMath parsed files
102+
*.sage.py
103+
104+
# Environments
105+
.env
106+
.venv
107+
env/
108+
venv/
109+
ENV/
110+
env.bak/
111+
venv.bak/
112+
113+
# Spyder project settings
114+
.spyderproject
115+
.spyproject
116+
117+
# Rope project settings
118+
.ropeproject
119+
120+
# mkdocs documentation
121+
/site
122+
123+
# mypy
124+
.mypy_cache/
125+
.dmypy.json
126+
dmypy.json
127+
128+
# Pyre type checker
129+
.pyre/
130+
.DS_Store

Dockerfile

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
FROM python:3.7
2+
3+
WORKDIR /app
4+
5+
RUN pip install pandas scikit-learn flask gunicorn
6+
7+
ADD ./core ./core
8+
ADD ./models ./models
9+
ADD main.py main.py
10+
11+
EXPOSE 5000
12+
13+
CMD [ "gunicorn", "--bind", "0.0.0.0:5000", "main:app" ]

LICENSE

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2020 Casper Bøgeskov Hansen
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Predicting Miles Per Gallon
2+
3+
Just a sample ML application on a toy dataset wrapped in a Docker container.
4+
5+
An example of input to Flask service in main.py:
6+
7+
```json
8+
{
9+
"cylinders": 8,
10+
"displacement": 307.0,
11+
"horsepower": 130.0,
12+
"weight": 3504,
13+
"acceleration": 12.0,
14+
"model_year": 70,
15+
"origin": 1,
16+
"car_name": "chevrolet chevelle malibu"
17+
}
18+
```

build_push_image.sh

+12
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
#!/bin/bash
2+
ADDRESS=gcr.io
3+
PROJECT_ID=ml-docker-kubernetes
4+
REPOSITORY=auto
5+
VERSION=1.0.0
6+
7+
docker build -t ${PROJECT_ID}:${VERSION} .
8+
ID="$(sudo docker images | grep ${PROJECT_ID} | head -n 1 | awk '{print $3}')"
9+
10+
docker tag ${ID} $ADDRESS/${PROJECT_ID}/${REPOSITORY}:${VERSION}
11+
12+
docker push $ADDRESS/${PROJECT_ID}/${REPOSITORY}:${VERSION}

core/clean_data.py

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
2+
class CleanData():
3+
def __init__(self):
4+
pass
5+
6+
def clear_question_marks(self, df):
7+
df = df[df['horsepower'] != '?']
8+
df.astype({"horsepower": float})
9+
10+
return df
11+
12+
def drop_unused_columns(self, df):
13+
return df.drop(['mpg', 'car_name'], axis=1)
14+
15+
def drop_car_name(self, df):
16+
return df.drop(['car_name'], axis=1)

core/load_data.py

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import pandas as pd
2+
import joblib
3+
4+
class LoadData():
5+
def __init__(self):
6+
self.columns = ['mpg',
7+
'cylinders',
8+
'displacement',
9+
'horsepower',
10+
'weight',
11+
'acceleration',
12+
'model_year',
13+
'origin',
14+
'car_name']
15+
16+
def load_dataset_as_df(self):
17+
df = pd.read_table("data/auto-mpg.data", header=None, delim_whitespace=True)
18+
df.columns = self.columns
19+
20+
return df
21+
22+
def load_model_from_path(self, path):
23+
return joblib.load(path)
24+
25+
def json_to_df(self, json):
26+
df = pd.DataFrame(json, index=[0])
27+
28+
return df

core/predict_data.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
from sklearn.metrics import r2_score
2+
3+
class PredictData():
4+
def __init__(self, model = None):
5+
self.model = model
6+
self.pred = []
7+
self.score = None
8+
9+
def predict(self, X_test, model = None):
10+
if model != None:
11+
self.pred = model.predict(X_test)
12+
else:
13+
self.pred = self.model.predict(X_test)
14+
15+
return self.pred
16+
17+
def score_r2(self, y_test):
18+
self.score = r2_score(y_test, self.pred)
19+
return self.score

0 commit comments

Comments
 (0)