From b716349e5209393d5fe2e985b4c7f0462d02a199 Mon Sep 17 00:00:00 2001
From: Diego-Llorente <d.llorentecobas@gmail.com>
Date: Sat, 12 Oct 2024 09:41:49 +0100
Subject: [PATCH] lab complete

---
 .../lab-hypothesis-testing-checkpoint.ipynb   | 1288 +++++++++++++++++
 lab-hypothesis-testing.ipynb                  | 1004 +++++++++++--
 2 files changed, 2174 insertions(+), 118 deletions(-)
 create mode 100644 .ipynb_checkpoints/lab-hypothesis-testing-checkpoint.ipynb
diff --git a/.ipynb_checkpoints/lab-hypothesis-testing-checkpoint.ipynb b/.ipynb_checkpoints/lab-hypothesis-testing-checkpoint.ipynb
new file mode 100644
index 0000000..c1fa91c
--- /dev/null
+++ b/.ipynb_checkpoints/lab-hypothesis-testing-checkpoint.ipynb
@@ -0,0 +1,1288 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Lab | Hypothesis Testing"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Objective**\n",
+    "\n",
+    "Welcome to the Hypothesis Testing Lab, where we embark on an enlightening journey through the realm of statistical decision-making! In this laboratory, we delve into various scenarios, applying the powerful tools of hypothesis testing to scrutinize and interpret data.\n",
+    "\n",
+    "From testing the mean of a single sample (One Sample T-Test), to investigating differences between independent groups (Two Sample T-Test), and exploring relationships within dependent samples (Paired Sample T-Test), our exploration knows no bounds. Furthermore, we'll venture into the realm of Analysis of Variance (ANOVA), unraveling the complexities of comparing means across multiple groups.\n",
+    "\n",
+    "So, grab your statistical tools, prepare your hypotheses, and let's embark on this fascinating journey of exploration and discovery in the world of hypothesis testing!"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Challenge 1**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this challenge, we will be working with pokemon data. The data can be found here:\n",
+    "\n",
+    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#libraries\n",
+    "import pandas as pd\n",
+    "import scipy.stats as st\n",
+    "import numpy as np"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>Bulbasaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>45</td>\n",
+       "      <td>49</td>\n",
+       "      <td>49</td>\n",
+       "      <td>65</td>\n",
+       "      <td>65</td>\n",
+       "      <td>45</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>Ivysaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>60</td>\n",
+       "      <td>62</td>\n",
+       "      <td>63</td>\n",
+       "      <td>80</td>\n",
+       "      <td>80</td>\n",
+       "      <td>60</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>80</td>\n",
+       "      <td>82</td>\n",
+       "      <td>83</td>\n",
+       "      <td>100</td>\n",
+       "      <td>100</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>Mega Venusaur</td>\n",
+       "      <td>Grass</td>\n",
+       "      <td>Poison</td>\n",
+       "      <td>80</td>\n",
+       "      <td>100</td>\n",
+       "      <td>123</td>\n",
+       "      <td>122</td>\n",
+       "      <td>120</td>\n",
+       "      <td>80</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>Charmander</td>\n",
+       "      <td>Fire</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>39</td>\n",
+       "      <td>52</td>\n",
+       "      <td>43</td>\n",
+       "      <td>60</td>\n",
+       "      <td>50</td>\n",
+       "      <td>65</td>\n",
+       "      <td>1</td>\n",
+       "      <td>False</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>795</th>\n",
+       "      <td>Diancie</td>\n",
+       "      <td>Rock</td>\n",
+       "      <td>Fairy</td>\n",
+       "      <td>50</td>\n",
+       "      <td>100</td>\n",
+       "      <td>150</td>\n",
+       "      <td>100</td>\n",
+       "      <td>150</td>\n",
+       "      <td>50</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>796</th>\n",
+       "      <td>Mega Diancie</td>\n",
+       "      <td>Rock</td>\n",
+       "      <td>Fairy</td>\n",
+       "      <td>50</td>\n",
+       "      <td>160</td>\n",
+       "      <td>110</td>\n",
+       "      <td>160</td>\n",
+       "      <td>110</td>\n",
+       "      <td>110</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>797</th>\n",
+       "      <td>Hoopa Confined</td>\n",
+       "      <td>Psychic</td>\n",
+       "      <td>Ghost</td>\n",
+       "      <td>80</td>\n",
+       "      <td>110</td>\n",
+       "      <td>60</td>\n",
+       "      <td>150</td>\n",
+       "      <td>130</td>\n",
+       "      <td>70</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>798</th>\n",
+       "      <td>Hoopa Unbound</td>\n",
+       "      <td>Psychic</td>\n",
+       "      <td>Dark</td>\n",
+       "      <td>80</td>\n",
+       "      <td>160</td>\n",
+       "      <td>60</td>\n",
+       "      <td>170</td>\n",
+       "      <td>130</td>\n",
+       "      <td>80</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>799</th>\n",
+       "      <td>Volcanion</td>\n",
+       "      <td>Fire</td>\n",
+       "      <td>Water</td>\n",
+       "      <td>80</td>\n",
+       "      <td>110</td>\n",
+       "      <td>120</td>\n",
+       "      <td>130</td>\n",
+       "      <td>90</td>\n",
+       "      <td>70</td>\n",
+       "      <td>6</td>\n",
+       "      <td>True</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>800 rows × 11 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "               Name   Type 1  Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  \\\n",
+       "0         Bulbasaur    Grass  Poison  45      49       49       65       65   \n",
+       "1           Ivysaur    Grass  Poison  60      62       63       80       80   \n",
+       "2          Venusaur    Grass  Poison  80      82       83      100      100   \n",
+       "3     Mega Venusaur    Grass  Poison  80     100      123      122      120   \n",
+       "4        Charmander     Fire     NaN  39      52       43       60       50   \n",
+       "..              ...      ...     ...  ..     ...      ...      ...      ...   \n",
+       "795         Diancie     Rock   Fairy  50     100      150      100      150   \n",
+       "796    Mega Diancie     Rock   Fairy  50     160      110      160      110   \n",
+       "797  Hoopa Confined  Psychic   Ghost  80     110       60      150      130   \n",
+       "798   Hoopa Unbound  Psychic    Dark  80     160       60      170      130   \n",
+       "799       Volcanion     Fire   Water  80     110      120      130       90   \n",
+       "\n",
+       "     Speed  Generation  Legendary  \n",
+       "0       45           1      False  \n",
+       "1       60           1      False  \n",
+       "2       80           1      False  \n",
+       "3       80           1      False  \n",
+       "4       65           1      False  \n",
+       "..     ...         ...        ...  \n",
+       "795     50           6       True  \n",
+       "796    110           6       True  \n",
+       "797     70           6       True  \n",
+       "798     80           6       True  \n",
+       "799     70           6       True  \n",
+       "\n",
+       "[800 rows x 11 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- We posit that Pokemons of type Dragon have, on average, more HP stats than Grass. Choose the propper test and, with 5% significance, comment your findings."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#H0: avg dragon HP <= avg grass HP\n",
+    "#H1: avg dragon HP > avg grass HP"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "alpha: 0.05, p_value: 0.0002567969150153481\n",
+      "We can reject the hypothesis with a 95% degree of confidence. THe average HP for dragon pokemons is greater than grass pokemons\n"
+     ]
+    }
+   ],
+   "source": [
+    "#code here\n",
+    "#I need to use One tailed t-test\n",
+    "\n",
+    "alpha = 0.05\n",
+    "\n",
+    "grass_hp = df[df[\"Type 1\"] == \"Grass\"][\"HP\"]\n",
+    "dragon_hp = df[df[\"Type 1\"] == \"Dragon\"][\"HP\"]\n",
+    "\n",
+    "t_stat, p_value = st.ttest_ind(dragon_hp, grass_hp, alternative = \"greater\")\n",
+    "\n",
+    "print(f\"alpha: {alpha}, p_value: {p_value}\")\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the hypothesis with a 95% degree of confidence. THe average HP for dragon pokemons is greater than grass pokemons\")\n",
+    "else:\n",
+    "    print(\"We don't have enough data to be able to reject the null hypothesis.\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "67.27142857142857"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "grass_hp.mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "83.3125"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dragon_hp.mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#H0: legendary stats = non-legendary stats\n",
+    "#H1: legendary stats != non-legendary stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "Empty DataFrame\n",
+       "Columns: [Name, Type 1, Type 2, HP, Attack, Defense, Sp. Atk, Sp. Def, Speed, Generation, Legendary]\n",
+       "Index: []"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head(0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For HP, t_stat: 8.036124405043928, p_value: 3.330647684846191e-15\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in HP\n",
+      "For Attack, t_stat: 10.397321023700622, p_value: 7.827253003205333e-24\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Attack\n",
+      "For Defense, t_stat: 7.181240122992339, p_value: 1.5842226094427255e-12\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Defense\n",
+      "For Sp. Atk, t_stat: 14.191406210846289, p_value: 6.314915770427266e-41\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Sp. Atk\n",
+      "For Sp. Def, t_stat: 11.03775106120522, p_value: 1.8439809580409594e-26\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Sp. Def\n",
+      "For Speed, t_stat: 9.765234331931898, p_value: 2.3540754436898437e-21\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Speed\n"
+     ]
+    }
+   ],
+   "source": [
+    "#code here\n",
+    "#Two_sample-t-Test\n",
+    "#WE're going to compare lengendary and non-legendary stas one by one to test if the are equal.\n",
+    "\n",
+    "alpha = 0.05\n",
+    "\n",
+    "stats = [\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\"]\n",
+    "\n",
+    "legendary = df[df[\"Legendary\"] == True]\n",
+    "\n",
+    "not_legendary = df[df[\"Legendary\"] == False]\n",
+    "\n",
+    "for stat in stats:\n",
+    "    t_stat, p_value = st.ttest_ind(legendary[stat], not_legendary[stat])\n",
+    "    print(f\"For {stat}, t_stat: {t_stat}, p_value: {p_value}\")\n",
+    "    \n",
+    "    if p_value < alpha:\n",
+    "        print(f\"We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in {stat}\")\n",
+    "    else:\n",
+    "        print(f\"We don't have enough data to reject the null hypothesis for {stat}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stats = [\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\"]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "92.73846153846154"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"HP\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "67.18231292517007"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"HP\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "116.67692307692307"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Attack\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "75.66938775510204"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Attack\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "99.66153846153846"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Defense\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "71.55918367346939"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Defense\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "122.18461538461538"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Sp. Atk\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "68.45442176870748"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Sp. Atk\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "105.93846153846154"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Sp. Def\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "68.89251700680272"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Sp. Def\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100.18461538461538"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Speed\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "65.45578231292517"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Speed\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Challenge 2**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this challenge, we will be working with california-housing data. The data can be found here:\n",
+    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>longitude</th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>housing_median_age</th>\n",
+       "      <th>total_rooms</th>\n",
+       "      <th>total_bedrooms</th>\n",
+       "      <th>population</th>\n",
+       "      <th>households</th>\n",
+       "      <th>median_income</th>\n",
+       "      <th>median_house_value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-114.31</td>\n",
+       "      <td>34.19</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>5612.0</td>\n",
+       "      <td>1283.0</td>\n",
+       "      <td>1015.0</td>\n",
+       "      <td>472.0</td>\n",
+       "      <td>1.4936</td>\n",
+       "      <td>66900.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-114.47</td>\n",
+       "      <td>34.40</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>7650.0</td>\n",
+       "      <td>1901.0</td>\n",
+       "      <td>1129.0</td>\n",
+       "      <td>463.0</td>\n",
+       "      <td>1.8200</td>\n",
+       "      <td>80100.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-114.56</td>\n",
+       "      <td>33.69</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>720.0</td>\n",
+       "      <td>174.0</td>\n",
+       "      <td>333.0</td>\n",
+       "      <td>117.0</td>\n",
+       "      <td>1.6509</td>\n",
+       "      <td>85700.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-114.57</td>\n",
+       "      <td>33.64</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>1501.0</td>\n",
+       "      <td>337.0</td>\n",
+       "      <td>515.0</td>\n",
+       "      <td>226.0</td>\n",
+       "      <td>3.1917</td>\n",
+       "      <td>73400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-114.57</td>\n",
+       "      <td>33.57</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>1454.0</td>\n",
+       "      <td>326.0</td>\n",
+       "      <td>624.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>1.9250</td>\n",
+       "      <td>65500.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>-114.58</td>\n",
+       "      <td>33.63</td>\n",
+       "      <td>29.0</td>\n",
+       "      <td>1387.0</td>\n",
+       "      <td>236.0</td>\n",
+       "      <td>671.0</td>\n",
+       "      <td>239.0</td>\n",
+       "      <td>3.3438</td>\n",
+       "      <td>74000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>-114.58</td>\n",
+       "      <td>33.61</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>2907.0</td>\n",
+       "      <td>680.0</td>\n",
+       "      <td>1841.0</td>\n",
+       "      <td>633.0</td>\n",
+       "      <td>2.6768</td>\n",
+       "      <td>82400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>-114.59</td>\n",
+       "      <td>34.83</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>812.0</td>\n",
+       "      <td>168.0</td>\n",
+       "      <td>375.0</td>\n",
+       "      <td>158.0</td>\n",
+       "      <td>1.7083</td>\n",
+       "      <td>48500.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>-114.59</td>\n",
+       "      <td>33.61</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>4789.0</td>\n",
+       "      <td>1175.0</td>\n",
+       "      <td>3134.0</td>\n",
+       "      <td>1056.0</td>\n",
+       "      <td>2.1782</td>\n",
+       "      <td>58400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>-114.60</td>\n",
+       "      <td>34.83</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>1497.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>787.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>2.1908</td>\n",
+       "      <td>48100.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
+       "0    -114.31     34.19                15.0       5612.0          1283.0   \n",
+       "1    -114.47     34.40                19.0       7650.0          1901.0   \n",
+       "2    -114.56     33.69                17.0        720.0           174.0   \n",
+       "3    -114.57     33.64                14.0       1501.0           337.0   \n",
+       "4    -114.57     33.57                20.0       1454.0           326.0   \n",
+       "5    -114.58     33.63                29.0       1387.0           236.0   \n",
+       "6    -114.58     33.61                25.0       2907.0           680.0   \n",
+       "7    -114.59     34.83                41.0        812.0           168.0   \n",
+       "8    -114.59     33.61                34.0       4789.0          1175.0   \n",
+       "9    -114.60     34.83                46.0       1497.0           309.0   \n",
+       "\n",
+       "   population  households  median_income  median_house_value  \n",
+       "0      1015.0       472.0         1.4936             66900.0  \n",
+       "1      1129.0       463.0         1.8200             80100.0  \n",
+       "2       333.0       117.0         1.6509             85700.0  \n",
+       "3       515.0       226.0         3.1917             73400.0  \n",
+       "4       624.0       262.0         1.9250             65500.0  \n",
+       "5       671.0       239.0         3.3438             74000.0  \n",
+       "6      1841.0       633.0         2.6768             82400.0  \n",
+       "7       375.0       158.0         1.7083             48500.0  \n",
+       "8      3134.0      1056.0         2.1782             58400.0  \n",
+       "9       787.0       271.0         2.1908             48100.0  "
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
+    "df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**We posit that houses close to either a school or a hospital are more expensive.**\n",
+    "\n",
+    "- School coordinates (-118, 37)\n",
+    "- Hospital coordinates (-122, 34)\n",
+    "\n",
+    "We consider a house (neighborhood) to be close to a school or hospital if the distance is lower than 0.50.\n",
+    "\n",
+    "Hint:\n",
+    "- Write a function to calculate euclidean distance from each house (neighborhood) to the school and to the hospital.\n",
+    "- Divide your dataset into houses close and far from either a hospital or school.\n",
+    "- Choose the propper test and, with 5% significance, comment your findings.\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Hypothesis:\n",
+    "#H0: prices of houses near schools or hospitals  = houses far from schools or hospitals    \n",
+    "#H1: prices of houses near schools or hospitals  != houses far from schools or hospitals"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Euclidean distance\n",
+    "def euc_dist(x1, x2, y1, y2):\n",
+    "        return np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Dividing the dataset into houses close and far from EITHER a hospital OR school\n",
+    "\n",
+    "school = (-118, 37)\n",
+    "hospital = (-122, 34)\n",
+    "\n",
+    "#distance from school\n",
+    "df[\"dist_school\"] = df.apply(lambda row: euc_dist(row[\"longitude\"], school[0], row[\"latitude\"], school[1]), axis = 1)\n",
+    "\n",
+    "df[\"dist_hospital\"] = df.apply(lambda row: euc_dist(row[\"longitude\"], hospital[0], row[\"latitude\"], hospital[1]), axis = 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>longitude</th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>housing_median_age</th>\n",
+       "      <th>total_rooms</th>\n",
+       "      <th>total_bedrooms</th>\n",
+       "      <th>population</th>\n",
+       "      <th>households</th>\n",
+       "      <th>median_income</th>\n",
+       "      <th>median_house_value</th>\n",
+       "      <th>dist_school</th>\n",
+       "      <th>dist_hospital</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-114.31</td>\n",
+       "      <td>34.19</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>5612.0</td>\n",
+       "      <td>1283.0</td>\n",
+       "      <td>1015.0</td>\n",
+       "      <td>472.0</td>\n",
+       "      <td>1.4936</td>\n",
+       "      <td>66900.0</td>\n",
+       "      <td>4.638125</td>\n",
+       "      <td>7.692347</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-114.47</td>\n",
+       "      <td>34.40</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>7650.0</td>\n",
+       "      <td>1901.0</td>\n",
+       "      <td>1129.0</td>\n",
+       "      <td>463.0</td>\n",
+       "      <td>1.8200</td>\n",
+       "      <td>80100.0</td>\n",
+       "      <td>4.384165</td>\n",
+       "      <td>7.540617</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-114.56</td>\n",
+       "      <td>33.69</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>720.0</td>\n",
+       "      <td>174.0</td>\n",
+       "      <td>333.0</td>\n",
+       "      <td>117.0</td>\n",
+       "      <td>1.6509</td>\n",
+       "      <td>85700.0</td>\n",
+       "      <td>4.773856</td>\n",
+       "      <td>7.446456</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
+       "0    -114.31     34.19                15.0       5612.0          1283.0   \n",
+       "1    -114.47     34.40                19.0       7650.0          1901.0   \n",
+       "2    -114.56     33.69                17.0        720.0           174.0   \n",
+       "\n",
+       "   population  households  median_income  median_house_value  dist_school  \\\n",
+       "0      1015.0       472.0         1.4936             66900.0     4.638125   \n",
+       "1      1129.0       463.0         1.8200             80100.0     4.384165   \n",
+       "2       333.0       117.0         1.6509             85700.0     4.773856   \n",
+       "\n",
+       "   dist_hospital  \n",
+       "0       7.692347  \n",
+       "1       7.540617  \n",
+       "2       7.446456  "
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "longitude             float64\n",
+       "latitude              float64\n",
+       "housing_median_age    float64\n",
+       "total_rooms           float64\n",
+       "total_bedrooms        float64\n",
+       "population            float64\n",
+       "households            float64\n",
+       "median_income         float64\n",
+       "median_house_value    float64\n",
+       "dist_school           float64\n",
+       "dist_hospital         float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "df[\"close_to_school_hospital\"] = ((df[\"dist_school\"] < 0.5) | (df[\"dist_hospital\"] < 0.5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "close_to_school_hospital\n",
+       "False    16995\n",
+       "True         5\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[\"close_to_school_hospital\"].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#We need to be cautios because we only have 5 datapoints which are near to a hospital or a school."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha = 0.05\n",
+    "\n",
+    "close_price = df[df[\"close_to_school_hospital\"] == True][\"median_house_value\"]\n",
+    "\n",
+    "far_price = df[df[\"close_to_school_hospital\"] == False][\"median_house_value\"]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.026799733071128685"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t_stat, p_value = st.ttest_ind(close_price, far_price)\n",
+    "p_value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant \n",
+      "    difference between house prices of those near and those far fromm schools or hospitals\n"
+     ]
+    }
+   ],
+   "source": [
+    "if p_value < alpha:\n",
+    "    print(f'''We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant \n",
+    "    difference between house prices of those near and those far fromm schools or hospitals''')\n",
+    "else: \n",
+    "    print(f\"We don't have enough data to reject the null hypothesis.\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
index 18ad6d5..c1fa91c 100644
--- a/lab-hypothesis-testing.ipynb
+++ b/lab-hypothesis-testing.ipynb
@@ -45,13 +45,12 @@
     "#libraries\n",
     "import pandas as pd\n",
     "import scipy.stats as st\n",
-    "import numpy as np\n",
-    "\n"
+    "import numpy as np"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -278,7 +277,7 @@
        "[800 rows x 11 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,47 +296,107 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#code here"
+    "#H0: avg dragon HP <= avg grass HP\n",
+    "#H1: avg dragon HP > avg grass HP"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 4,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "alpha: 0.05, p_value: 0.0002567969150153481\n",
+      "We can reject the hypothesis with a 95% degree of confidence. THe average HP for dragon pokemons is greater than grass pokemons\n"
+     ]
+    }
+   ],
    "source": [
-    "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n"
+    "#code here\n",
+    "#I need to use One tailed t-test\n",
+    "\n",
+    "alpha = 0.05\n",
+    "\n",
+    "grass_hp = df[df[\"Type 1\"] == \"Grass\"][\"HP\"]\n",
+    "dragon_hp = df[df[\"Type 1\"] == \"Dragon\"][\"HP\"]\n",
+    "\n",
+    "t_stat, p_value = st.ttest_ind(dragon_hp, grass_hp, alternative = \"greater\")\n",
+    "\n",
+    "print(f\"alpha: {alpha}, p_value: {p_value}\")\n",
+    "\n",
+    "if p_value < alpha:\n",
+    "    print(\"We can reject the hypothesis with a 95% degree of confidence. THe average HP for dragon pokemons is greater than grass pokemons\")\n",
+    "else:\n",
+    "    print(\"We don't have enough data to be able to reject the null hypothesis.\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "67.27142857142857"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "#code here"
+    "grass_hp.mean()"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 6,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "83.3125"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "**Challenge 2**"
+    "dragon_hp.mean()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In this challenge, we will be working with california-housing data. The data can be found here:\n",
-    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv"
+    "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#H0: legendary stats = non-legendary stats\n",
+    "#H1: legendary stats != non-legendary stats"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -361,144 +420,853 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>longitude</th>\n",
-       "      <th>latitude</th>\n",
-       "      <th>housing_median_age</th>\n",
-       "      <th>total_rooms</th>\n",
-       "      <th>total_bedrooms</th>\n",
-       "      <th>population</th>\n",
-       "      <th>households</th>\n",
-       "      <th>median_income</th>\n",
-       "      <th>median_house_value</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Type 1</th>\n",
+       "      <th>Type 2</th>\n",
+       "      <th>HP</th>\n",
+       "      <th>Attack</th>\n",
+       "      <th>Defense</th>\n",
+       "      <th>Sp. Atk</th>\n",
+       "      <th>Sp. Def</th>\n",
+       "      <th>Speed</th>\n",
+       "      <th>Generation</th>\n",
+       "      <th>Legendary</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-114.31</td>\n",
-       "      <td>34.19</td>\n",
-       "      <td>15.0</td>\n",
-       "      <td>5612.0</td>\n",
-       "      <td>1283.0</td>\n",
-       "      <td>1015.0</td>\n",
-       "      <td>472.0</td>\n",
-       "      <td>1.4936</td>\n",
-       "      <td>66900.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-114.47</td>\n",
-       "      <td>34.40</td>\n",
-       "      <td>19.0</td>\n",
-       "      <td>7650.0</td>\n",
-       "      <td>1901.0</td>\n",
-       "      <td>1129.0</td>\n",
-       "      <td>463.0</td>\n",
-       "      <td>1.8200</td>\n",
-       "      <td>80100.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-114.56</td>\n",
-       "      <td>33.69</td>\n",
-       "      <td>17.0</td>\n",
-       "      <td>720.0</td>\n",
-       "      <td>174.0</td>\n",
-       "      <td>333.0</td>\n",
-       "      <td>117.0</td>\n",
-       "      <td>1.6509</td>\n",
-       "      <td>85700.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>-114.57</td>\n",
-       "      <td>33.64</td>\n",
-       "      <td>14.0</td>\n",
-       "      <td>1501.0</td>\n",
-       "      <td>337.0</td>\n",
-       "      <td>515.0</td>\n",
-       "      <td>226.0</td>\n",
-       "      <td>3.1917</td>\n",
-       "      <td>73400.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-114.57</td>\n",
-       "      <td>33.57</td>\n",
-       "      <td>20.0</td>\n",
-       "      <td>1454.0</td>\n",
-       "      <td>326.0</td>\n",
-       "      <td>624.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>1.9250</td>\n",
-       "      <td>65500.0</td>\n",
-       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
-       "0    -114.31     34.19                15.0       5612.0          1283.0   \n",
-       "1    -114.47     34.40                19.0       7650.0          1901.0   \n",
-       "2    -114.56     33.69                17.0        720.0           174.0   \n",
-       "3    -114.57     33.64                14.0       1501.0           337.0   \n",
-       "4    -114.57     33.57                20.0       1454.0           326.0   \n",
-       "\n",
-       "   population  households  median_income  median_house_value  \n",
-       "0      1015.0       472.0         1.4936             66900.0  \n",
-       "1      1129.0       463.0         1.8200             80100.0  \n",
-       "2       333.0       117.0         1.6509             85700.0  \n",
-       "3       515.0       226.0         3.1917             73400.0  \n",
-       "4       624.0       262.0         1.9250             65500.0  "
+       "Empty DataFrame\n",
+       "Columns: [Name, Type 1, Type 2, HP, Attack, Defense, Sp. Atk, Sp. Def, Speed, Generation, Legendary]\n",
+       "Index: []"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
-    "df.head()"
+    "df.head(0)"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 9,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "For HP, t_stat: 8.036124405043928, p_value: 3.330647684846191e-15\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in HP\n",
+      "For Attack, t_stat: 10.397321023700622, p_value: 7.827253003205333e-24\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Attack\n",
+      "For Defense, t_stat: 7.181240122992339, p_value: 1.5842226094427255e-12\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Defense\n",
+      "For Sp. Atk, t_stat: 14.191406210846289, p_value: 6.314915770427266e-41\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Sp. Atk\n",
+      "For Sp. Def, t_stat: 11.03775106120522, p_value: 1.8439809580409594e-26\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Sp. Def\n",
+      "For Speed, t_stat: 9.765234331931898, p_value: 2.3540754436898437e-21\n",
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in Speed\n"
+     ]
+    }
+   ],
    "source": [
-    "**We posit that houses close to either a school or a hospital are more expensive.**\n",
+    "#code here\n",
+    "#Two_sample-t-Test\n",
+    "#WE're going to compare lengendary and non-legendary stas one by one to test if the are equal.\n",
     "\n",
-    "- School coordinates (-118, 37)\n",
-    "- Hospital coordinates (-122, 34)\n",
+    "alpha = 0.05\n",
     "\n",
-    "We consider a house (neighborhood) to be close to a school or hospital if the distance is lower than 0.50.\n",
+    "stats = [\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\"]\n",
     "\n",
-    "Hint:\n",
-    "- Write a function to calculate euclidean distance from each house (neighborhood) to the school and to the hospital.\n",
-    "- Divide your dataset into houses close and far from either a hospital or school.\n",
-    "- Choose the propper test and, with 5% significance, comment your findings.\n",
-    " "
+    "legendary = df[df[\"Legendary\"] == True]\n",
+    "\n",
+    "not_legendary = df[df[\"Legendary\"] == False]\n",
+    "\n",
+    "for stat in stats:\n",
+    "    t_stat, p_value = st.ttest_ind(legendary[stat], not_legendary[stat])\n",
+    "    print(f\"For {stat}, t_stat: {t_stat}, p_value: {p_value}\")\n",
+    "    \n",
+    "    if p_value < alpha:\n",
+    "        print(f\"We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant difference between legendary and non legeendary in {stat}\")\n",
+    "    else:\n",
+    "        print(f\"We don't have enough data to reject the null hypothesis for {stat}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "stats = [\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\"]\n"
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "92.73846153846154"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"HP\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "67.18231292517007"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"HP\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "116.67692307692307"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Attack\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "75.66938775510204"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Attack\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "99.66153846153846"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Defense\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "71.55918367346939"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Defense\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "122.18461538461538"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Sp. Atk\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "68.45442176870748"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Sp. Atk\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "105.93846153846154"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Sp. Def\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "68.89251700680272"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Sp. Def\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "100.18461538461538"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "legendary[\"Speed\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "65.45578231292517"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "not_legendary[\"Speed\"].mean()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**Challenge 2**"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this challenge, we will be working with california-housing data. The data can be found here:\n",
+    "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>longitude</th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>housing_median_age</th>\n",
+       "      <th>total_rooms</th>\n",
+       "      <th>total_bedrooms</th>\n",
+       "      <th>population</th>\n",
+       "      <th>households</th>\n",
+       "      <th>median_income</th>\n",
+       "      <th>median_house_value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-114.31</td>\n",
+       "      <td>34.19</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>5612.0</td>\n",
+       "      <td>1283.0</td>\n",
+       "      <td>1015.0</td>\n",
+       "      <td>472.0</td>\n",
+       "      <td>1.4936</td>\n",
+       "      <td>66900.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-114.47</td>\n",
+       "      <td>34.40</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>7650.0</td>\n",
+       "      <td>1901.0</td>\n",
+       "      <td>1129.0</td>\n",
+       "      <td>463.0</td>\n",
+       "      <td>1.8200</td>\n",
+       "      <td>80100.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-114.56</td>\n",
+       "      <td>33.69</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>720.0</td>\n",
+       "      <td>174.0</td>\n",
+       "      <td>333.0</td>\n",
+       "      <td>117.0</td>\n",
+       "      <td>1.6509</td>\n",
+       "      <td>85700.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-114.57</td>\n",
+       "      <td>33.64</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>1501.0</td>\n",
+       "      <td>337.0</td>\n",
+       "      <td>515.0</td>\n",
+       "      <td>226.0</td>\n",
+       "      <td>3.1917</td>\n",
+       "      <td>73400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-114.57</td>\n",
+       "      <td>33.57</td>\n",
+       "      <td>20.0</td>\n",
+       "      <td>1454.0</td>\n",
+       "      <td>326.0</td>\n",
+       "      <td>624.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>1.9250</td>\n",
+       "      <td>65500.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>-114.58</td>\n",
+       "      <td>33.63</td>\n",
+       "      <td>29.0</td>\n",
+       "      <td>1387.0</td>\n",
+       "      <td>236.0</td>\n",
+       "      <td>671.0</td>\n",
+       "      <td>239.0</td>\n",
+       "      <td>3.3438</td>\n",
+       "      <td>74000.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>-114.58</td>\n",
+       "      <td>33.61</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>2907.0</td>\n",
+       "      <td>680.0</td>\n",
+       "      <td>1841.0</td>\n",
+       "      <td>633.0</td>\n",
+       "      <td>2.6768</td>\n",
+       "      <td>82400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>-114.59</td>\n",
+       "      <td>34.83</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>812.0</td>\n",
+       "      <td>168.0</td>\n",
+       "      <td>375.0</td>\n",
+       "      <td>158.0</td>\n",
+       "      <td>1.7083</td>\n",
+       "      <td>48500.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>-114.59</td>\n",
+       "      <td>33.61</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>4789.0</td>\n",
+       "      <td>1175.0</td>\n",
+       "      <td>3134.0</td>\n",
+       "      <td>1056.0</td>\n",
+       "      <td>2.1782</td>\n",
+       "      <td>58400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>-114.60</td>\n",
+       "      <td>34.83</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>1497.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>787.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>2.1908</td>\n",
+       "      <td>48100.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
+       "0    -114.31     34.19                15.0       5612.0          1283.0   \n",
+       "1    -114.47     34.40                19.0       7650.0          1901.0   \n",
+       "2    -114.56     33.69                17.0        720.0           174.0   \n",
+       "3    -114.57     33.64                14.0       1501.0           337.0   \n",
+       "4    -114.57     33.57                20.0       1454.0           326.0   \n",
+       "5    -114.58     33.63                29.0       1387.0           236.0   \n",
+       "6    -114.58     33.61                25.0       2907.0           680.0   \n",
+       "7    -114.59     34.83                41.0        812.0           168.0   \n",
+       "8    -114.59     33.61                34.0       4789.0          1175.0   \n",
+       "9    -114.60     34.83                46.0       1497.0           309.0   \n",
+       "\n",
+       "   population  households  median_income  median_house_value  \n",
+       "0      1015.0       472.0         1.4936             66900.0  \n",
+       "1      1129.0       463.0         1.8200             80100.0  \n",
+       "2       333.0       117.0         1.6509             85700.0  \n",
+       "3       515.0       226.0         3.1917             73400.0  \n",
+       "4       624.0       262.0         1.9250             65500.0  \n",
+       "5       671.0       239.0         3.3438             74000.0  \n",
+       "6      1841.0       633.0         2.6768             82400.0  \n",
+       "7       375.0       158.0         1.7083             48500.0  \n",
+       "8      3134.0      1056.0         2.1782             58400.0  \n",
+       "9       787.0       271.0         2.1908             48100.0  "
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
+    "df.head(10)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "**We posit that houses close to either a school or a hospital are more expensive.**\n",
+    "\n",
+    "- School coordinates (-118, 37)\n",
+    "- Hospital coordinates (-122, 34)\n",
+    "\n",
+    "We consider a house (neighborhood) to be close to a school or hospital if the distance is lower than 0.50.\n",
+    "\n",
+    "Hint:\n",
+    "- Write a function to calculate euclidean distance from each house (neighborhood) to the school and to the hospital.\n",
+    "- Divide your dataset into houses close and far from either a hospital or school.\n",
+    "- Choose the propper test and, with 5% significance, comment your findings.\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Hypothesis:\n",
+    "#H0: prices of houses near schools or hospitals  = houses far from schools or hospitals    \n",
+    "#H1: prices of houses near schools or hospitals  != houses far from schools or hospitals"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Euclidean distance\n",
+    "def euc_dist(x1, x2, y1, y2):\n",
+    "        return np.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Dividing the dataset into houses close and far from EITHER a hospital OR school\n",
+    "\n",
+    "school = (-118, 37)\n",
+    "hospital = (-122, 34)\n",
+    "\n",
+    "#distance from school\n",
+    "df[\"dist_school\"] = df.apply(lambda row: euc_dist(row[\"longitude\"], school[0], row[\"latitude\"], school[1]), axis = 1)\n",
+    "\n",
+    "df[\"dist_hospital\"] = df.apply(lambda row: euc_dist(row[\"longitude\"], hospital[0], row[\"latitude\"], hospital[1]), axis = 1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>longitude</th>\n",
+       "      <th>latitude</th>\n",
+       "      <th>housing_median_age</th>\n",
+       "      <th>total_rooms</th>\n",
+       "      <th>total_bedrooms</th>\n",
+       "      <th>population</th>\n",
+       "      <th>households</th>\n",
+       "      <th>median_income</th>\n",
+       "      <th>median_house_value</th>\n",
+       "      <th>dist_school</th>\n",
+       "      <th>dist_hospital</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-114.31</td>\n",
+       "      <td>34.19</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>5612.0</td>\n",
+       "      <td>1283.0</td>\n",
+       "      <td>1015.0</td>\n",
+       "      <td>472.0</td>\n",
+       "      <td>1.4936</td>\n",
+       "      <td>66900.0</td>\n",
+       "      <td>4.638125</td>\n",
+       "      <td>7.692347</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-114.47</td>\n",
+       "      <td>34.40</td>\n",
+       "      <td>19.0</td>\n",
+       "      <td>7650.0</td>\n",
+       "      <td>1901.0</td>\n",
+       "      <td>1129.0</td>\n",
+       "      <td>463.0</td>\n",
+       "      <td>1.8200</td>\n",
+       "      <td>80100.0</td>\n",
+       "      <td>4.384165</td>\n",
+       "      <td>7.540617</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-114.56</td>\n",
+       "      <td>33.69</td>\n",
+       "      <td>17.0</td>\n",
+       "      <td>720.0</td>\n",
+       "      <td>174.0</td>\n",
+       "      <td>333.0</td>\n",
+       "      <td>117.0</td>\n",
+       "      <td>1.6509</td>\n",
+       "      <td>85700.0</td>\n",
+       "      <td>4.773856</td>\n",
+       "      <td>7.446456</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \\\n",
+       "0    -114.31     34.19                15.0       5612.0          1283.0   \n",
+       "1    -114.47     34.40                19.0       7650.0          1901.0   \n",
+       "2    -114.56     33.69                17.0        720.0           174.0   \n",
+       "\n",
+       "   population  households  median_income  median_house_value  dist_school  \\\n",
+       "0      1015.0       472.0         1.4936             66900.0     4.638125   \n",
+       "1      1129.0       463.0         1.8200             80100.0     4.384165   \n",
+       "2       333.0       117.0         1.6509             85700.0     4.773856   \n",
+       "\n",
+       "   dist_hospital  \n",
+       "0       7.692347  \n",
+       "1       7.540617  \n",
+       "2       7.446456  "
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "longitude             float64\n",
+       "latitude              float64\n",
+       "housing_median_age    float64\n",
+       "total_rooms           float64\n",
+       "total_bedrooms        float64\n",
+       "population            float64\n",
+       "households            float64\n",
+       "median_income         float64\n",
+       "median_house_value    float64\n",
+       "dist_school           float64\n",
+       "dist_hospital         float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "df[\"close_to_school_hospital\"] = ((df[\"dist_school\"] < 0.5) | (df[\"dist_hospital\"] < 0.5))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "close_to_school_hospital\n",
+       "False    16995\n",
+       "True         5\n",
+       "Name: count, dtype: int64"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df[\"close_to_school_hospital\"].value_counts()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#We need to be cautios because we only have 5 datapoints which are near to a hospital or a school."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "alpha = 0.05\n",
+    "\n",
+    "close_price = df[df[\"close_to_school_hospital\"] == True][\"median_house_value\"]\n",
+    "\n",
+    "far_price = df[df[\"close_to_school_hospital\"] == False][\"median_house_value\"]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.026799733071128685"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t_stat, p_value = st.ttest_ind(close_price, far_price)\n",
+    "p_value"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant \n",
+      "    difference between house prices of those near and those far fromm schools or hospitals\n"
+     ]
+    }
+   ],
+   "source": [
+    "if p_value < alpha:\n",
+    "    print(f'''We reject the null hypothesis. We can say, with 95% degree of confidence that there is a significant \n",
+    "    difference between house prices of those near and those far fromm schools or hospitals''')\n",
+    "else: \n",
+    "    print(f\"We don't have enough data to reject the null hypothesis.\")"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -512,7 +1280,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.11.7"
   }
  },
  "nbformat": 4,

	Name	Type 1	Type 2	HP	Attack	Defense	Sp. Atk	Sp. Def	Speed	Generation	Legendary
0	Bulbasaur	Grass	Poison	45	49	49	65	65	45	1	False
1	Ivysaur	Grass	Poison	60	62	63	80	80	60	1	False
2	Venusaur	Grass	Poison	80	82	83	100	100	80	1	False
3	Mega Venusaur	Grass	Poison	80	100	123	122	120	80	1	False
4	Charmander	Fire	NaN	39	52	43	60	50	65	1	False
...	...	...	...	...	...	...	...	...	...	...	...
795	Diancie	Rock	Fairy	50	100	150	100	150	50	6	True
796	Mega Diancie	Rock	Fairy	50	160	110	160	110	110	6	True
797	Hoopa Confined	Psychic	Ghost	80	110	60	150	130	70	6	True
798	Hoopa Unbound	Psychic	Dark	80	160	60	170	130	80	6	True
799	Volcanion	Fire	Water	80	110	120	130	90	70	6	True
	longitude	latitude	housing_median_age	total_rooms	total_bedrooms	population	households	median_income	median_house_value
0	-114.31	34.19	15.0	5612.0	1283.0	1015.0	472.0	1.4936	66900.0
1	-114.47	34.40	19.0	7650.0	1901.0	1129.0	463.0	1.8200	80100.0
2	-114.56	33.69	17.0	720.0	174.0	333.0	117.0	1.6509	85700.0
3	-114.57	33.64	14.0	1501.0	337.0	515.0	226.0	3.1917	73400.0
4	-114.57	33.57	20.0	1454.0	326.0	624.0	262.0	1.9250	65500.0
5	-114.58	33.63	29.0	1387.0	236.0	671.0	239.0	3.3438	74000.0
6	-114.58	33.61	25.0	2907.0	680.0	1841.0	633.0	2.6768	82400.0
7	-114.59	34.83	41.0	812.0	168.0	375.0	158.0	1.7083	48500.0
8	-114.59	33.61	34.0	4789.0	1175.0	3134.0	1056.0	2.1782	58400.0
9	-114.60	34.83	46.0	1497.0	309.0	787.0	271.0	2.1908	48100.0