-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpd_download.py
60 lines (34 loc) · 1.66 KB
/
pd_download.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import pandas as pd
import warnings
# -------------------------------------------------Settings--------------------------------------------------------------------
pd.set_option("display.width", 30000)
pd.set_option("display.max_columns", 30000)
pd.set_option("display.max_rows", 30000)
pd.set_option("display.float_format", lambda x: "%.0f" %x)
warnings.filterwarnings("ignore")
# ---------------------------------------------------Data Downloading----------------------------------------------------------s
def data_download_sas(file_path):
''' Data Download function '''
df_pd = pd.read_sas(file_path)
return df_pd
def data_cleaning(file_path):
''' Various data cleaning functionalities '''
dataframe = data_download_sas(file_path)
data_types = dataframe.dtypes
df_cat = dataframe.select_dtypes(object)
df_float = dataframe.select_dtypes(float)
for i in range(df_cat.shape[0]):
for j in range(df_cat.shape[1]):
if type(df_cat.iloc[i,j]) == bytes:
y = df_cat.iloc[i,j].decode("utf-8")
df_cat.replace(df_cat.iloc[i,j], y, inplace=True)
else:
pass
df_cat['PRODUCT'] = df_cat['PRODUCT'].replace('Others','OT')
df_cat['NAT'] = df_cat['NAT'].replace('Others','RS')
return data_types, df_cat, df_float
# ---------------------------------------------------------Testing-----------------------------------------------------------------------
# if __name__ == "__main__":
# file_path = "./KGB.sas7bdat"
# data_types, df_loan_categorical, df_loan_float = data_cleaning(file_path)
# print(df_loan_categorical)