Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lab_done #151

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 129 additions & 16 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,36 @@
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-Statistic: 3.3349632905124063, P-Value: 0.0015987219490841197\n",
"We reject the null hypothesis. Dragon-type Pokémon have significantly different HP than Grass-type.\n"
]
}
],
"source": [
"#code here"
"import pandas as pd\n",
"import scipy.stats as st\n",
"\n",
"# Load the dataset\n",
"df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n",
"\n",
"# Challenge 1: Dragon vs Grass HP comparison\n",
"dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n",
"grass_hp = df[df['Type 1'] == 'Grass']['HP']\n",
"\n",
"# Perform Two-Sample T-Test\n",
"t_stat, p_value = st.ttest_ind(dragon_hp, grass_hp, equal_var=False)\n",
"print(f\"T-Statistic: {t_stat}, P-Value: {p_value}\")\n",
"\n",
"# Check significance at 0.05\n",
"if p_value < 0.05:\n",
" print(\"We reject the null hypothesis. Dragon-type Pokémon have significantly different HP than Grass-type.\")\n",
"else:\n",
" print(\"We fail to reject the null hypothesis. No significant difference in HP.\")\n"
]
},
{
Expand All @@ -313,11 +340,60 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 3,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HP - T-Statistic: 8.981370483625046, P-Value: 1.0026911708035284e-13\n",
"We reject the null hypothesis for HP. Legendary Pokémon have significantly different HP compared to Non-Legendary.\n",
"Attack - T-Statistic: 10.438133539322203, P-Value: 2.520372449236646e-16\n",
"We reject the null hypothesis for Attack. Legendary Pokémon have significantly different Attack compared to Non-Legendary.\n",
"Defense - T-Statistic: 7.637078164784618, P-Value: 4.826998494919331e-11\n",
"We reject the null hypothesis for Defense. Legendary Pokémon have significantly different Defense compared to Non-Legendary.\n",
"Sp. Atk - T-Statistic: 13.417449984138461, P-Value: 1.5514614112239816e-21\n",
"We reject the null hypothesis for Sp. Atk. Legendary Pokémon have significantly different Sp. Atk compared to Non-Legendary.\n",
"Sp. Def - T-Statistic: 10.015696613114878, P-Value: 2.2949327864052826e-15\n",
"We reject the null hypothesis for Sp. Def. Legendary Pokémon have significantly different Sp. Def compared to Non-Legendary.\n",
"Speed - T-Statistic: 11.47504444631443, P-Value: 1.0490163118824507e-18\n",
"We reject the null hypothesis for Speed. Legendary Pokémon have significantly different Speed compared to Non-Legendary.\n"
]
}
],
"source": [
"#code here"
"import pandas as pd\n",
"import scipy.stats as st\n",
"\n",
"# Load the dataset\n",
"df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n",
"\n",
"# Split data into Legendary and Non-Legendary\n",
"legendary = df[df['Legendary'] == True]\n",
"non_legendary = df[df['Legendary'] == False]\n",
"\n",
"# Define the stats we want to test\n",
"stats = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
"\n",
"# Perform Two-Sample T-Test for each stat\n",
"for stat in stats:\n",
" legendary_stat = legendary[stat]\n",
" non_legendary_stat = non_legendary[stat]\n",
" \n",
" # Perform Two-Sample T-Test\n",
" t_stat, p_value = st.ttest_ind(legendary_stat, non_legendary_stat, equal_var=False)\n",
" \n",
" # Apply Bonferroni correction\n",
" corrected_alpha = 0.05 / len(stats)\n",
" \n",
" # Print results\n",
" print(f\"{stat} - T-Statistic: {t_stat}, P-Value: {p_value}\")\n",
" \n",
" if p_value < corrected_alpha:\n",
" print(f\"We reject the null hypothesis for {stat}. Legendary Pokémon have significantly different {stat} compared to Non-Legendary.\")\n",
" else:\n",
" print(f\"We fail to reject the null hypothesis for {stat}. No significant difference in {stat}.\")\n"
]
},
{
Expand Down Expand Up @@ -483,17 +559,54 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"T-Statistic: -17.174167998688404, P-Value: 5.220018561223529e-05\n",
"We reject the null hypothesis. Houses close to schools or hospitals are more expensive.\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import scipy.stats as st\n",
"\n",
"# Load the dataset\n",
"df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
"\n",
"# Coordinates of the school and hospital\n",
"school_coords = (-118, 37)\n",
"hospital_coords = (-122, 34)\n",
"\n",
"# Define a function to calculate Euclidean distance\n",
"def euclidean_distance(x1, y1, x2, y2):\n",
" return np.sqrt((x2 - x1)**2 + (y2 - y1)**2)\n",
"\n",
"# Calculate distances to school and hospital\n",
"df['dist_school'] = df.apply(lambda row: euclidean_distance(row['longitude'], row['latitude'], school_coords[0], school_coords[1]), axis=1)\n",
"df['dist_hospital'] = df.apply(lambda row: euclidean_distance(row['longitude'], row['latitude'], hospital_coords[0], hospital_coords[1]), axis=1)\n",
"\n",
"# Consider a house 'close' if it's within 0.50 distance to either school or hospital\n",
"df['close'] = np.where((df['dist_school'] < 0.5) | (df['dist_hospital'] < 0.5), 'close', 'far')\n",
"\n",
"# Perform Two-Sample T-Test\n",
"close_prices = df[df['close'] == 'close']['median_house_value']\n",
"far_prices = df[df['close'] == 'far']['median_house_value']\n",
"\n",
"t_stat, p_value = st.ttest_ind(close_prices, far_prices, equal_var=False)\n",
"print(f\"T-Statistic: {t_stat}, P-Value: {p_value}\")\n",
"\n",
"# Check significance at 0.05\n",
"if p_value < 0.05:\n",
" print(\"We reject the null hypothesis. Houses close to schools or hospitals are more expensive.\")\n",
"else:\n",
" print(\"We fail to reject the null hypothesis. No significant difference in house prices.\")\n"
]
}
],
"metadata": {
Expand All @@ -512,7 +625,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.12.6"
}
},
"nbformat": 4,
Expand Down