-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathNn
67 lines (54 loc) · 2.4 KB
/
Nn
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from tensorflow.keras.callbacks import EarlyStopping
# Load your dataset
file_path = 'path/to/your/dataset.csv' # Update this path
df = pd.read_csv(file_path)
# Assuming 'DiagPeriodL90D' is the target variable and 'patient_id' is a non-feature column
X = df.drop(['DiagPeriodL90D', 'patient_id'], axis=1)
y = df['DiagPeriodL90D']
# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Identifying numerical and categorical columns
numerical_cols = X_train.select_dtypes(include=['int64', 'float64']).columns
categorical_cols = X_train.select_dtypes(include=['object', 'bool']).columns
# Preprocessing pipelines for both numerical and categorical data
numerical_pipeline = Pipeline(steps=[
('imputer', SimpleImputer(strategy='mean')),
('scaler', StandardScaler())
])
categorical_pipeline = Pipeline(steps=[
('imputer', SimpleImputer(strategy='most_frequent')),
('onehot', OneHotEncoder(handle_unknown='ignore'))
])
# Combining preprocessing steps
preprocessor = ColumnTransformer(transformers=[
('num', numerical_pipeline, numerical_cols),
('cat', categorical_pipeline, categorical_cols)
])
# Applying preprocessing
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)
# Neural network architecture
model = Sequential([
InputLayer(input_shape=(X_train_processed.shape[1],)),
Dense(128, activation='relu'),
Dense(64, activation='relu'),
Dense(1, activation='sigmoid')
])
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# Train the model
history = model.fit(X_train_processed, y_train, epochs=100, validation_split=0.2, callbacks=[early_stopping], batch_size=32)
# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(X_test_processed, y_test, verbose=2)
print(f"Test Accuracy: {test_acc}")