-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b85646e
commit 25d27a0
Showing
6 changed files
with
69 additions
and
21 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,28 +1,70 @@ | ||
# Databricks notebook source | ||
|
||
import pytest | ||
import os | ||
import pandas as pd | ||
from sklearn.model_selection import train_test_split | ||
from ml.model import train_model | ||
from sklearn.ensemble import RandomForestClassifier | ||
from pathlib import Path | ||
|
||
|
||
|
||
# TODO: add necessary import | ||
|
||
|
||
# TODO: implement the first test. Change the function name and input as needed | ||
def test_one(): | ||
def test_train_test_split_size(): | ||
""" | ||
# add description for the first test | ||
checking that the sliced data is ready for testing | ||
""" | ||
# Your code here | ||
pass | ||
data_path = './data/census.csv' | ||
data = pd.read_csv(str(data_path)) | ||
train, test = train_test_split(data, test_size = 0.2) | ||
assert len(test) >= 2000 | ||
|
||
|
||
|
||
# TODO: implement the second test. Change the function name and input as needed | ||
def test_two(): | ||
def test_column_names(): | ||
""" | ||
# add description for the second test | ||
testing that all features are in the data | ||
""" | ||
# Your code here | ||
pass | ||
data_path = './data/census.csv' | ||
data = pd.read_csv(data_path) | ||
|
||
features = { | ||
'age', | ||
'workclass', | ||
'fnlgt', | ||
'education', | ||
'education-num', | ||
'marital-status', | ||
'occupation', | ||
'relationship', | ||
'race', | ||
'sex', | ||
'capital-gain', | ||
'capital-loss', | ||
'hours-per-week', | ||
'native-country', | ||
'salary' | ||
} | ||
|
||
assert set(data.columns) == features | ||
|
||
|
||
# TODO: implement the third test. Change the function name and input as needed | ||
def test_three(): | ||
def test_model_type(): | ||
""" | ||
# add description for the third test | ||
testing that the model is random forest classifier | ||
""" | ||
# Your code here | ||
pass | ||
sample_x = [[0, 1, 2], [3, 4, 5]] | ||
sample_y = ['col1', 'col2'] | ||
|
||
model = train_model(sample_x, sample_y) | ||
|
||
assert isinstance(model, RandomForestClassifier) |