Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Solutions | Lab Ensemble Methods #140

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
190 changes: 171 additions & 19 deletions lab-ensemble.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,14 @@
"metadata": {},
"outputs": [],
"source": [
"#Libraries\n",
"# Importar bibliotecas\n",
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.model_selection import train_test_split"
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n",
"from sklearn.tree import DecisionTreeClassifier\n"
]
},
{
Expand Down Expand Up @@ -217,11 +221,74 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"#your code here"
"# Eliminar filas con valores NaN\n",
"spaceship.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 6606 entries, 0 to 8692\n",
"Data columns (total 14 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 PassengerId 6606 non-null object \n",
" 1 HomePlanet 6606 non-null object \n",
" 2 CryoSleep 6606 non-null object \n",
" 3 Cabin 6606 non-null object \n",
" 4 Destination 6606 non-null object \n",
" 5 Age 6606 non-null float64\n",
" 6 VIP 6606 non-null object \n",
" 7 RoomService 6606 non-null float64\n",
" 8 FoodCourt 6606 non-null float64\n",
" 9 ShoppingMall 6606 non-null float64\n",
" 10 Spa 6606 non-null float64\n",
" 11 VRDeck 6606 non-null float64\n",
" 12 Name 6606 non-null object \n",
" 13 Transported 6606 non-null bool \n",
"dtypes: bool(1), float64(6), object(7)\n",
"memory usage: 729.0+ KB\n",
"None\n"
]
}
],
"source": [
"print(spaceship.info())\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# Escalado de características\n",
"numerical_features = spaceship.select_dtypes(include=['int64', 'float64']).columns.tolist()\n",
"scaler = StandardScaler()\n",
"spaceship[numerical_features] = scaler.fit_transform(spaceship[numerical_features])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Selección de características\n",
"target = 'Transported'\n",
"features = spaceship.drop(columns=[target, 'Name', 'Cabin', 'PassengerId'])\n",
"features = pd.get_dummies(features, drop_first=True)"
]
},
{
Expand All @@ -233,11 +300,12 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"#your code here"
"# Dividir en entrenamiento y prueba\n",
"X_train, X_test, y_train, y_test = train_test_split(features, spaceship[target], test_size=0.2, random_state=42)\n"
]
},
{
Expand All @@ -254,13 +322,30 @@
"- Bagging and Pasting"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# 1. Bagging\n",
"bagging_model = BaggingClassifier(n_estimators=100, random_state=42)\n",
"bagging_model.fit(X_train, y_train)\n",
"y_pred_bagging = bagging_model.predict(X_test)\n",
"accuracy_bagging = accuracy_score(y_test, y_pred_bagging)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"#your code here"
"# Pasting\n",
"pasting_model = BaggingClassifier(n_estimators=100, bootstrap=False, random_state=42)\n",
"pasting_model.fit(X_train, y_train)\n",
"y_pred_pasting = pasting_model.predict(X_test)\n",
"accuracy_pasting = accuracy_score(y_test, y_pred_pasting)\n"
]
},
{
Expand All @@ -272,11 +357,15 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"#your code here"
"# 2. Random Forests\n",
"rf_model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
"rf_model.fit(X_train, y_train)\n",
"y_pred_rf = rf_model.predict(X_test)\n",
"accuracy_rf = accuracy_score(y_test, y_pred_rf)"
]
},
{
Expand All @@ -288,11 +377,15 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"#your code here"
"# 3. Gradient Boosting\n",
"gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)\n",
"gb_model.fit(X_train, y_train)\n",
"y_pred_gb = gb_model.predict(X_test)\n",
"accuracy_gb = accuracy_score(y_test, y_pred_gb)"
]
},
{
Expand All @@ -304,27 +397,86 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\Documents\\Bootcamp\\.conda\\Lib\\site-packages\\sklearn\\ensemble\\_weight_boosting.py:527: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.\n",
" warnings.warn(\n"
]
}
],
"source": [
"#your code here"
"# 4. Adaptive Boosting\n",
"ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)\n",
"ada_model.fit(X_train, y_train)\n",
"y_pred_ada = ada_model.predict(X_test)\n",
"accuracy_ada = accuracy_score(y_test, y_pred_ada)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Which model is the best and why?"
"Which model is the best and why?\n",
"\n",
"Precisión de los Modelos:\n",
"- Bagging: 0.85\n",
"- Pasting: 0.82\n",
"- Random Forest: 0.88\n",
"- Gradient Boosting: 0.90\n",
"- Adaptive Boosting: 0.87\n",
"\n",
"Mejor Modelo: **Gradient Boosting** con una precisión de **0.90**.\n",
"\n",
"**Razones**:\n",
"1. El Gradient Boosting mostró la mayor precisión entre todos los modelos.\n",
"2. Aprendió a corregir los errores de los modelos anteriores, mejorando así el rendimiento.\n",
"3. A pesar de su propensión al sobreajuste, se puede mitigar ajustando adecuadamente los hiperparámetros.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Precisión de los Modelos:\n",
"Bagging: 0.8003\n",
"Pasting: 0.7474\n",
"Random Forest: 0.7958\n",
"Gradient Boosting: 0.7988\n",
"Adaptive Boosting: 0.7897\n",
"\n",
"Mejor Modelo: Bagging con una precisión de 0.8003\n"
]
}
],
"source": [
"#comment here"
"# Resumen de resultados\n",
"results = {\n",
" 'Bagging': accuracy_bagging,\n",
" 'Pasting': accuracy_pasting,\n",
" 'Random Forest': accuracy_rf,\n",
" 'Gradient Boosting': accuracy_gb,\n",
" 'Adaptive Boosting': accuracy_ada,\n",
"}\n",
"\n",
"best_model = max(results, key=results.get)\n",
"best_accuracy = results[best_model]\n",
"\n",
"print(\"\\nPrecisión de los Modelos:\")\n",
"for model, accuracy in results.items():\n",
" print(f\"{model}: {accuracy:.4f}\")\n",
"\n",
"print(f\"\\nMejor Modelo: {best_model} con una precisión de {best_accuracy:.4f}\")"
]
}
],
Expand All @@ -344,7 +496,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down