This repository has been archived by the owner on Feb 27, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathcli.py
123 lines (110 loc) · 3.77 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import click
from hana_automl.automl import AutoML
from hana_automl.pipeline.input import Input
import numpy as np
@click.command()
@click.option("-i", help="Path or URL of file to be processed.")
@click.option("--target", help="Column or variable to be predicted")
@click.option("--table", default=None, help="Name of existing table created in HANA")
@click.option("--rm_columns", help="Columns in the dataframe to be removed")
@click.option("--categorical", help="Categorical features or columns in the dataframe")
@click.option("--steps", default=10, help="Specify the number of iterations")
@click.option("--id_column", default=None, help="ID column in table")
@click.option(
"--optimizer",
default="BayesianOptimizer",
help="Optimizer that will find the best algorithm",
)
@click.option("--wizard", default=False, help="Interactive mode. Best for beginners")
def start(
i, target, rm_columns, categorical, id_column, optimizer, steps, wizard, table
):
if wizard:
wizard_mode()
return
automl = AutoML()
automl.fit(
file_path=i,
target=target,
columns_to_remove=rm_columns,
categorical_features=categorical,
id_column=id_column,
optimizer=optimizer,
steps=steps,
table_name=table,
)
def wizard_mode():
file_path = input(
"Welcome to the wizard mode! It will guide you through the whole AutoML process. Let's start with an input "
"file. It can be a URl or a path. Press [Enter] to proceed with default file: "
)
df = Input.download_data("data/train.csv" if file_path == "" else file_path)
print(f"Here's your dataframe: \n{df.head()}")
print(f"Its columns:")
col_index = 0
col_list = None
for col in df.columns:
print(f"[{col_index}]", col)
col_index += 1
target = df.columns[
int(
input(
"Enter number of column(s) that you want to predict. (Example: 3,4 or 5): "
)
)
]
print("HANA needs ID column to work properly")
col_index = 0
for col in df.columns:
print(f"[{col_index}]", col)
col_index += 1
id_column = df.columns[
int(input("Enter number of column to set as ID column. (Example: 3,4 or 5): "))
]
rm_col = input("Do you want to remove any columns? y|n: ")
if rm_col == "yes" or rm_col == "y":
print("Here are the columns:")
col_index = 0
for col in df.columns:
print(f"[{col_index}]", col)
col_index += 1
col = input("Enter number of columns to delete. (Example: 1,4,5): ")
col_list = []
for i in col.split(","):
col_list.append(df.columns[int(i)])
print(f"OK. Columns {col_list} will be deleted later.")
print(
"We've automatically detected categorical (string, date, object, etc) columns:"
)
col_index = 0
for col in df.columns:
print(
f"[{col_index}]",
"categ."
if df[col].dtype == str or df[col].dtype == np.object_
else "normal",
f"dtype: {df[col].dtype}",
"name: ",
col,
)
col_index += 1
cat_col = input("Enter categorical columns to confirm. (Example: 3,4,5): ")
cat_list = []
for i in cat_col.split(","):
cat_list.append(df.columns[int(i)])
table = input(
"Great. Now we need to load your data to HANA. Enter name if you have existing table ([Enter] for "
"none): "
)
print("Starting automated machine learning...")
automl = AutoML()
automl.fit(
df,
target=target,
table_name=table,
columns_to_remove=col_list,
categorical_features=cat_list,
id_column=id_column,
)
if __name__ == "__main__":
start()