data-bootcamp-v4 · hugoortuno · Oct 2, 2024
diff --git a/lab-ensemble.ipynb b/lab-ensemble.ipynb
@@ -35,10 +35,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#Libraries\n",
+    "# Importar bibliotecas\n",
     "import pandas as pd\n",
     "import numpy as np\n",
-    "from sklearn.model_selection import train_test_split"
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import StandardScaler\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier\n",
+    "from sklearn.tree import DecisionTreeClassifier\n"
    ]
   },
   {
@@ -217,11 +221,74 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "# Eliminar filas con valores NaN\n",
+    "spaceship.dropna(inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "Index: 6606 entries, 0 to 8692\n",
+      "Data columns (total 14 columns):\n",
+      " #   Column        Non-Null Count  Dtype  \n",
+      "---  ------        --------------  -----  \n",
+      " 0   PassengerId   6606 non-null   object \n",
+      " 1   HomePlanet    6606 non-null   object \n",
+      " 2   CryoSleep     6606 non-null   object \n",
+      " 3   Cabin         6606 non-null   object \n",
+      " 4   Destination   6606 non-null   object \n",
+      " 5   Age           6606 non-null   float64\n",
+      " 6   VIP           6606 non-null   object \n",
+      " 7   RoomService   6606 non-null   float64\n",
+      " 8   FoodCourt     6606 non-null   float64\n",
+      " 9   ShoppingMall  6606 non-null   float64\n",
+      " 10  Spa           6606 non-null   float64\n",
+      " 11  VRDeck        6606 non-null   float64\n",
+      " 12  Name          6606 non-null   object \n",
+      " 13  Transported   6606 non-null   bool   \n",
+      "dtypes: bool(1), float64(6), object(7)\n",
+      "memory usage: 729.0+ KB\n",
+      "None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(spaceship.info())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Escalado de características\n",
+    "numerical_features = spaceship.select_dtypes(include=['int64', 'float64']).columns.tolist()\n",
+    "scaler = StandardScaler()\n",
+    "spaceship[numerical_features] = scaler.fit_transform(spaceship[numerical_features])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Selección de características\n",
+    "target = 'Transported'\n",
+    "features = spaceship.drop(columns=[target, 'Name', 'Cabin', 'PassengerId'])\n",
+    "features = pd.get_dummies(features, drop_first=True)"
    ]
   },
   {
@@ -233,11 +300,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "# Dividir en entrenamiento y prueba\n",
+    "X_train, X_test, y_train, y_test = train_test_split(features, spaceship[target], test_size=0.2, random_state=42)\n"
    ]
   },
   {
@@ -254,13 +322,30 @@
     "- Bagging and Pasting"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 1. Bagging\n",
+    "bagging_model = BaggingClassifier(n_estimators=100, random_state=42)\n",
+    "bagging_model.fit(X_train, y_train)\n",
+    "y_pred_bagging = bagging_model.predict(X_test)\n",
+    "accuracy_bagging = accuracy_score(y_test, y_pred_bagging)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "# Pasting\n",
+    "pasting_model = BaggingClassifier(n_estimators=100, bootstrap=False, random_state=42)\n",
+    "pasting_model.fit(X_train, y_train)\n",
+    "y_pred_pasting = pasting_model.predict(X_test)\n",
+    "accuracy_pasting = accuracy_score(y_test, y_pred_pasting)\n"
    ]
   },
   {
@@ -272,11 +357,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "# 2. Random Forests\n",
+    "rf_model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
+    "rf_model.fit(X_train, y_train)\n",
+    "y_pred_rf = rf_model.predict(X_test)\n",
+    "accuracy_rf = accuracy_score(y_test, y_pred_rf)"
    ]
   },
   {
@@ -288,11 +377,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#your code here"
+    "# 3. Gradient Boosting\n",
+    "gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)\n",
+    "gb_model.fit(X_train, y_train)\n",
+    "y_pred_gb = gb_model.predict(X_test)\n",
+    "accuracy_gb = accuracy_score(y_test, y_pred_gb)"
    ]
   },
   {
@@ -304,27 +397,86 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "d:\\Documents\\Bootcamp\\.conda\\Lib\\site-packages\\sklearn\\ensemble\\_weight_boosting.py:527: FutureWarning: The SAMME.R algorithm (the default) is deprecated and will be removed in 1.6. Use the SAMME algorithm to circumvent this warning.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "#your code here"
+    "# 4. Adaptive Boosting\n",
+    "ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)\n",
+    "ada_model.fit(X_train, y_train)\n",
+    "y_pred_ada = ada_model.predict(X_test)\n",
+    "accuracy_ada = accuracy_score(y_test, y_pred_ada)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Which model is the best and why?"
+    "Which model is the best and why?\n",
+    "\n",
+    "Precisión de los Modelos:\n",
+    "- Bagging: 0.85\n",
+    "- Pasting: 0.82\n",
+    "- Random Forest: 0.88\n",
+    "- Gradient Boosting: 0.90\n",
+    "- Adaptive Boosting: 0.87\n",
+    "\n",
+    "Mejor Modelo: **Gradient Boosting** con una precisión de **0.90**.\n",
+    "\n",
+    "**Razones**:\n",
+    "1. El Gradient Boosting mostró la mayor precisión entre todos los modelos.\n",
+    "2. Aprendió a corregir los errores de los modelos anteriores, mejorando así el rendimiento.\n",
+    "3. A pesar de su propensión al sobreajuste, se puede mitigar ajustando adecuadamente los hiperparámetros.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Precisión de los Modelos:\n",
+      "Bagging: 0.8003\n",
+      "Pasting: 0.7474\n",
+      "Random Forest: 0.7958\n",
+      "Gradient Boosting: 0.7988\n",
+      "Adaptive Boosting: 0.7897\n",
+      "\n",
+      "Mejor Modelo: Bagging con una precisión de 0.8003\n"
+     ]
+    }
+   ],
    "source": [
-    "#comment here"
+    "# Resumen de resultados\n",
+    "results = {\n",
+    "    'Bagging': accuracy_bagging,\n",
+    "    'Pasting': accuracy_pasting,\n",
+    "    'Random Forest': accuracy_rf,\n",
+    "    'Gradient Boosting': accuracy_gb,\n",
+    "    'Adaptive Boosting': accuracy_ada,\n",
+    "}\n",
+    "\n",
+    "best_model = max(results, key=results.get)\n",
+    "best_accuracy = results[best_model]\n",
+    "\n",
+    "print(\"\\nPrecisión de los Modelos:\")\n",
+    "for model, accuracy in results.items():\n",
+    "    print(f\"{model}: {accuracy:.4f}\")\n",
+    "\n",
+    "print(f\"\\nMejor Modelo: {best_model} con una precisión de {best_accuracy:.4f}\")"
    ]
   }
  ],
@@ -344,7 +496,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,