Create Nn1

bp117 · Feb 9, 2024 · f3642c4 · f3642c4
1 parent e7ce037
commit f3642c4
Showing 1 changed file with 70 additions and 0 deletions.
diff --git a/Nn1 b/Nn1
@@ -0,0 +1,70 @@
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.compose import ColumnTransformer
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.impute import SimpleImputer
+from sklearn.pipeline import Pipeline
+import tensorflow as tf
+from tensorflow.keras.models import Sequential
+from tensorflow.keras.layers import Dense, InputLayer
+from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
+
+# Load your dataset
+file_path = 'path/to/your/dataset.csv'  # Update this path
+df = pd.read_csv(file_path)
+
+# Assuming 'DiagPeriodL90D' is the target variable and 'patient_id' is a non-feature column
+X = df.drop(['DiagPeriodL90D', 'patient_id'], axis=1)
+y = df['DiagPeriodL90D']
+
+# Splitting the dataset into training, validation, and test sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)  # 0.25 * 0.8 = 0.2
+
+# Preprocessing pipelines
+numerical_pipeline = Pipeline([
+    ('imputer', SimpleImputer(strategy='mean')),
+    ('scaler', StandardScaler())
+])
+categorical_pipeline = Pipeline([
+    ('imputer', SimpleImputer(strategy='most_frequent')),
+    ('onehot', OneHotEncoder(handle_unknown='ignore'))
+])
+preprocessor = ColumnTransformer([
+    ('num', numerical_pipeline, numerical_cols),
+    ('cat', categorical_pipeline, categorical_cols)
+])
+
+# Apply preprocessing
+X_train_processed = preprocessor.fit_transform(X_train)
+X_val_processed = preprocessor.transform(X_val)
+X_test_processed = preprocessor.transform(X_test)
+
+# Neural network architecture
+model = Sequential([
+    InputLayer(input_shape=(X_train_processed.shape[1],)),
+    Dense(128, activation='relu'),
+    Dense(64, activation='relu'),
+    Dense(1, activation='sigmoid')
+])
+
+# Compile the model
+model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
+
+# Callbacks
+early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
+model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True)
+
+# Train the model
+history = model.fit(
+    X_train_processed, y_train, 
+    epochs=100, 
+    validation_data=(X_val_processed, y_val),
+    callbacks=[early_stopping, model_checkpoint], 
+    batch_size=32
+)
+
+# Load the best model and evaluate on the test set
+model.load_weights('best_model.h5')
+test_loss, test_acc = model.evaluate(X_test_processed, y_test, verbose=2)
+print(f"Test Accuracy: {test_acc}")