updating recent files

XxRemsteelexX · Aug 2, 2024 · 25d27a0 · 25d27a0
1 parent b85646e
commit 25d27a0
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 21 deletions.
diff --git a/__pycache__/main.cpython-38.pyc b/__pycache__/main.cpython-38.pyc
diff --git a/__pycache__/test_ml.cpython-38-pytest-7.2.1.pyc b/__pycache__/test_ml.cpython-38-pytest-7.2.1.pyc
diff --git a/local_api.py b/local_api.py
@@ -1,14 +1,15 @@
+# Databricks notebook source
 import json
 
 import requests
 
 # TODO: send a GET using the URL http://127.0.0.1:8000
-r = # Your code here
+r = requests.get("http://127.0.0.1:8000")
 
 # TODO: print the status code
-# print()
+print("Status Code:", r.status_code)
 # TODO: print the welcome message
-# print()
+print("Result:", r.json())
 
 
 
@@ -30,9 +31,9 @@
 }
 
 # TODO: send a POST using the data above
-r = # Your code here
+r = requests.post("http://127.0.0.1:8000/data", data = json.dumps(data))
 
 # TODO: print the status code
-# print()
+print("Status Code:", r.status_code)
 # TODO: print the result
-# print()
+print("Result:", r.json())
diff --git a/main.py b/main.py
@@ -1,3 +1,4 @@
+# Databricks notebook source
 import os
 
 import pandas as pd
@@ -26,21 +27,21 @@ class Data(BaseModel):
     hours_per_week: int = Field(..., example=40, alias="hours-per-week")
     native_country: str = Field(..., example="United-States", alias="native-country")
 
-path = # TODO: enter the path for the saved encoder 
+path = './model/encoder.pkl'
 encoder = load_model(path)
 
-path = # TODO: enter the path for the saved model 
+path = './model/model.pkl'
 model = load_model(path)
 
 # TODO: create a RESTful API using FastAPI
-app = # your code here
+app = FastAPI()
 
 # TODO: create a GET on the root giving a welcome message
 @app.get("/")
 async def get_root():
     """ Say hello!"""
-    # your code here
-    pass
+    return {'Message': 'Welcome to the API!'}
+
 
 
 # TODO: create a POST on a different path that does model inference
@@ -69,6 +70,10 @@ async def post_inference(data: Data):
         # use data as data input
         # use training = False
         # do not need to pass lb as input
+        X = data,
+        categorical_features = cat_features,
+        training = False,
+        encoder = encoder
     )
-    _inference = # your code here to predict the result using data_processed
+    _inference = inference(model, data_processed)
     return {"result": apply_label(_inference)}
diff --git a/screenshots/unit_test.PNG b/screenshots/unit_test.PNG
diff --git a/test_ml.py b/test_ml.py
@@ -1,28 +1,70 @@
+# Databricks notebook source
+
 import pytest
+import os
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from ml.model import train_model
+from sklearn.ensemble import RandomForestClassifier
+from pathlib import Path
+
+
+
 # TODO: add necessary import
 
+
 # TODO: implement the first test. Change the function name and input as needed
-def test_one():
+def test_train_test_split_size():
     """
-    # add description for the first test
+    checking that the sliced data is ready for testing
     """
     # Your code here
-    pass
+    data_path = './data/census.csv'
+    data = pd.read_csv(str(data_path))
+    train, test = train_test_split(data, test_size = 0.2)
+    assert len(test) >= 2000
+
 
 
 # TODO: implement the second test. Change the function name and input as needed
-def test_two():
+def test_column_names():
     """
-    # add description for the second test
+    testing that all features are in the data
     """
     # Your code here
-    pass
+    data_path = './data/census.csv'
+    data = pd.read_csv(data_path)
+
+    features = {
+        'age',
+        'workclass',
+        'fnlgt',
+        'education',
+        'education-num',
+        'marital-status',
+        'occupation',
+        'relationship',
+        'race',
+        'sex',
+        'capital-gain',
+        'capital-loss',
+        'hours-per-week',
+        'native-country',
+        'salary'
+    }
+
+    assert set(data.columns) == features
 
 
 # TODO: implement the third test. Change the function name and input as needed
-def test_three():
+def test_model_type():
     """
-    # add description for the third test
+    testing that the model is random forest classifier 
     """
     # Your code here
-    pass
+    sample_x = [[0, 1, 2], [3, 4, 5]]
+    sample_y = ['col1', 'col2']
+
+    model = train_model(sample_x, sample_y)
+
+    assert isinstance(model, RandomForestClassifier)