Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 129 additions & 16 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -51,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 15,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -278,7 +278,7 @@
"[800 rows x 11 columns]"
]
},
"execution_count": 3,
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -297,11 +297,40 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 16,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mean(Dragon HP): 83.31 | Mean(Other HP): 68.67\n",
"Welch t-statistic: 3.400\n",
"One-sided p-value (Dragon > Others): 0.0008676\n",
"Conclusion: Reject H0: Dragons have higher average HP.\n"
]
}
],
"source": [
"#code here"
"dragons = df[df[\"Type 1\"] == \"Dragon\"][\"HP\"].dropna()\n",
"others = df[df[\"Type 1\"] != \"Dragon\"][\"HP\"].dropna()\n",
"\n",
"t_stat, p_two_sided = st.ttest_ind(dragons, others, equal_var=False, nan_policy=\"omit\")\n",
"\n",
"# Convert two-sided p to one-sided for 'greater' alternative:\n",
"# If mean(dragon) > mean(others), p_one_sided = p_two_sided / 2; else 1 - p_two_sided / 2\n",
"mean_diff = dragons.mean() - others.mean()\n",
"if mean_diff > 0:\n",
" p_one_sided = p_two_sided / 2\n",
"else:\n",
" p_one_sided = 1 - (p_two_sided / 2)\n",
"\n",
"alpha = 0.05\n",
"print(f\"Mean(Dragon HP): {dragons.mean():.2f} | Mean(Other HP): {others.mean():.2f}\")\n",
"print(f\"Welch t-statistic: {t_stat:.3f}\")\n",
"print(f\"One-sided p-value (Dragon > Others): {p_one_sided:.4g}\")\n",
"print(\"Conclusion:\", \"Reject H0: Dragons have higher average HP.\"\n",
" if p_one_sided < alpha else \"Fail to reject H0: Insufficient evidence that Dragons have higher HP.\")\n"
]
},
{
Expand All @@ -313,11 +342,47 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 17,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" stat mean_legendary mean_nonlegendary t_stat p_value significant_0.05\n",
" HP 92.738462 67.182313 8.981370 1.002691e-13 True\n",
" Attack 116.676923 75.669388 10.438134 2.520372e-16 True\n",
"Defense 99.661538 71.559184 7.637078 4.826998e-11 True\n",
"Sp. Atk 122.184615 68.454422 13.417450 1.551461e-21 True\n",
"Sp. Def 105.938462 68.892517 10.015697 2.294933e-15 True\n",
" Speed 100.184615 65.455782 11.475044 1.049016e-18 True\n"
]
}
],
"source": [
"#code here"
"stats_cols = [\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\"]\n",
"\n",
"leg = df[df[\"Legendary\"] == True]\n",
"non = df[df[\"Legendary\"] == False]\n",
"\n",
"results = []\n",
"for col in stats_cols:\n",
" x = leg[col].dropna()\n",
" y = non[col].dropna()\n",
" t, p = st.ttest_ind(x, y, equal_var=False, nan_policy=\"omit\")\n",
" results.append({\n",
" \"stat\": col,\n",
" \"mean_legendary\": float(np.mean(x)),\n",
" \"mean_nonlegendary\": float(np.mean(y)),\n",
" \"t_stat\": float(t),\n",
" \"p_value\": float(p)\n",
" })\n",
"\n",
"# Present as a small table and mark significance at alpha=0.05 (uncorrected)\n",
"import pandas as pd\n",
"res_df = pd.DataFrame(results)\n",
"res_df[\"significant_0.05\"] = res_df[\"p_value\"] < 0.05\n",
"print(res_df.to_string(index=False))"
]
},
{
Expand All @@ -337,7 +402,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 18,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -453,7 +518,7 @@
"4 624.0 262.0 1.9250 65500.0 "
]
},
"execution_count": 5,
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -483,10 +548,58 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Close mean: 246951.98213501245\n",
"Far mean: 180678.44105790975\n",
"t-statistic: 3.400096139118546\n",
"p-value: 3.0064957768592614e-301\n",
"Reject H0 → Houses close to school/hospital have significantly different prices.\n"
]
}
],
"source": [
"# Coordinates\n",
"school = (-118, 34)\n",
"hospital = (-122, 37)\n",
"\n",
"# Function to calculate Euclidean distance\n",
"def euclidean_distance(x1, y1, x2, y2):\n",
" return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)\n",
"\n",
"# Calculate distance to school and hospital\n",
"df[\"dist_school\"] = np.sqrt((df[\"longitude\"] - school[0])**2 + (df[\"latitude\"] - school[1])**2)\n",
"df[\"dist_hospital\"] = np.sqrt((df[\"longitude\"] - hospital[0])**2 + (df[\"latitude\"] - hospital[1])**2)\n",
"\n",
"# Close if distance < 0.5 to either\n",
"df[\"close\"] = ((df[\"dist_school\"] < 0.5) | (df[\"dist_hospital\"] < 0.5))\n",
"\n",
"# Split groups\n",
"close_values = df[df[\"close\"] == True][\"median_house_value\"]\n",
"far_values = df[df[\"close\"] == False][\"median_house_value\"]\n",
"\n",
"print(\"Close mean:\", close_values.mean())\n",
"print(\"Far mean:\", far_values.mean())\n",
"\n",
"# --- Hypothesis test ---\n",
"# Two-sample independent t-test\n",
"t_st, p_value = st.ttest_ind(close_values, far_values, equal_var=False)\n",
"\n",
"print(\"t-statistic:\", t_stat)\n",
"print(\"p-value:\", p_value)\n",
"\n",
"# Decision\n",
"alpha = 0.05\n",
"if p_value < alpha:\n",
" print(\"Reject H0 → Houses close to school/hospital have significantly different prices.\")\n",
"else:\n",
" print(\"Fail to reject H0 → No significant difference in prices.\")"
]
},
{
"cell_type": "code",
Expand All @@ -498,7 +611,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "base",
"language": "python",
"name": "python3"
},
Expand All @@ -512,7 +625,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.12.7"
}
},
"nbformat": 4,
Expand Down