From f3642c4411bfbcfd8227220d081c679430f9548c Mon Sep 17 00:00:00 2001 From: bp117 Date: Fri, 9 Feb 2024 14:44:51 +0530 Subject: [PATCH] Create Nn1 --- Nn1 | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 Nn1 diff --git a/Nn1 b/Nn1 new file mode 100644 index 0000000..19e14d3 --- /dev/null +++ b/Nn1 @@ -0,0 +1,70 @@ +import pandas as pd +from sklearn.model_selection import train_test_split +from sklearn.compose import ColumnTransformer +from sklearn.preprocessing import StandardScaler, OneHotEncoder +from sklearn.impute import SimpleImputer +from sklearn.pipeline import Pipeline +import tensorflow as tf +from tensorflow.keras.models import Sequential +from tensorflow.keras.layers import Dense, InputLayer +from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint + +# Load your dataset +file_path = 'path/to/your/dataset.csv' # Update this path +df = pd.read_csv(file_path) + +# Assuming 'DiagPeriodL90D' is the target variable and 'patient_id' is a non-feature column +X = df.drop(['DiagPeriodL90D', 'patient_id'], axis=1) +y = df['DiagPeriodL90D'] + +# Splitting the dataset into training, validation, and test sets +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) +X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42) # 0.25 * 0.8 = 0.2 + +# Preprocessing pipelines +numerical_pipeline = Pipeline([ + ('imputer', SimpleImputer(strategy='mean')), + ('scaler', StandardScaler()) +]) +categorical_pipeline = Pipeline([ + ('imputer', SimpleImputer(strategy='most_frequent')), + ('onehot', OneHotEncoder(handle_unknown='ignore')) +]) +preprocessor = ColumnTransformer([ + ('num', numerical_pipeline, numerical_cols), + ('cat', categorical_pipeline, categorical_cols) +]) + +# Apply preprocessing +X_train_processed = preprocessor.fit_transform(X_train) +X_val_processed = preprocessor.transform(X_val) +X_test_processed = preprocessor.transform(X_test) + +# Neural network architecture +model = Sequential([ + InputLayer(input_shape=(X_train_processed.shape[1],)), + Dense(128, activation='relu'), + Dense(64, activation='relu'), + Dense(1, activation='sigmoid') +]) + +# Compile the model +model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) + +# Callbacks +early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) +model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True) + +# Train the model +history = model.fit( + X_train_processed, y_train, + epochs=100, + validation_data=(X_val_processed, y_val), + callbacks=[early_stopping, model_checkpoint], + batch_size=32 +) + +# Load the best model and evaluate on the test set +model.load_weights('best_model.h5') +test_loss, test_acc = model.evaluate(X_test_processed, y_test, verbose=2) +print(f"Test Accuracy: {test_acc}")