Skip to content

Commit

Permalink
seventh commit
Browse files Browse the repository at this point in the history
  • Loading branch information
deadsoul44 committed Jun 7, 2024
1 parent 2da3358 commit e34ec03
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 4 deletions.
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Prior to running the tests, you should install `python-package` in editable mode
cd python-package
# Install the project in editable mode and all development dependencies
python -m pip install -e .[dev]
# You can now return to the rood directory and run the tests...
# You can now return to the root directory and run the tests...
cd ..

# Prior to running the tests, build all required test artifacts
Expand Down
7 changes: 4 additions & 3 deletions python-package/python/perpetual/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,11 +144,13 @@ def convert_input_frame(X: FrameLike, categorical_features) -> tuple[list[str],
Returns:
tuple[list[str], np.ndarray, int, int, Optional[Iterable[int]], Optional[dict]]: Return column names, the flat data, number of rows, the number of columns, cat_index, cat_mapping
"""
categorical_features_ = None
if isinstance(X, pd.DataFrame):
X_ = X.to_numpy()
features_ = X.columns.to_list()
if categorical_features == "auto":
categorical_features_ = [features_.index(c) for c in X.select_dtypes(include=['category']).columns.tolist()] or None
categorical_columns = X.select_dtypes(include=['category']).columns.tolist()
categorical_features_ = [features_.index(c) for c in categorical_columns] or None
else:
# Assume it's a numpy array.
X_ = X
Expand All @@ -158,8 +160,7 @@ def convert_input_frame(X: FrameLike, categorical_features) -> tuple[list[str],
categorical_features_ = categorical_features
elif categorical_features and all(isinstance(s, str) for s in categorical_features) and isinstance(categorical_features, list):
categorical_features_ = [features_.index(c) for c in categorical_features]
else:
categorical_features_ = None


cat_mapping = {} # key: feature_name, value: ordered category names
if categorical_features_:
Expand Down
8 changes: 8 additions & 0 deletions python-package/tests/test_booster.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,3 +730,11 @@ def test_booster_saving_with_monotone_constraints(
save_func(model, f64_model_path)
model_loaded = load_func(f64_model_path)
assert all(preds == model_loaded.predict(X))

def test_categorical(X_y):
X = pd.read_csv("../resources/adult_test_df.csv", index_col=False)
y = np.array(pd.read_csv("../resources/adult_test_y.csv", index_col=False, header=None).squeeze('columns'))
cols = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'native-country']
X[cols] = X[cols].astype('category')
model = PerpetualBooster()
model.fit(X, y)
2 changes: 2 additions & 0 deletions scripts/make_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,8 @@
features_, adult_train_flat, rows, cols, categorical_features_, cat_mapping = convert_input_frame(data_train, "auto")
features_, adult_test_flat, rows, cols = transform_input_frame(data_test, cat_mapping)

data_test.to_csv("resources/adult_test_df.csv", index=False)

pd.Series(adult_train_flat).to_csv("resources/adult_train_flat.csv", index=False, header=False)
pd.Series(adult_test_flat).to_csv("resources/adult_test_flat.csv", index=False, header=False)
pd.Series(y_train).to_csv("resources/adult_train_y.csv", index=False, header=False)
Expand Down

0 comments on commit e34ec03

Please sign in to comment.