data-bootcamp-v4 · helinozge · Oct 3, 2025
diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -278,7 +278,7 @@
        "[800 rows x 11 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,11 +297,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 16,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Mean(Dragon HP): 83.31  |  Mean(Other HP): 68.67\n",
+      "Welch t-statistic: 3.400\n",
+      "One-sided p-value (Dragon > Others): 0.0008676\n",
+      "Conclusion: Reject H0: Dragons have higher average HP.\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "dragons = df[df[\"Type 1\"] == \"Dragon\"][\"HP\"].dropna()\n",
+    "others  = df[df[\"Type 1\"] != \"Dragon\"][\"HP\"].dropna()\n",
+    "\n",
+    "t_stat, p_two_sided = st.ttest_ind(dragons, others, equal_var=False, nan_policy=\"omit\")\n",
+    "\n",
+    "# Convert two-sided p to one-sided for 'greater' alternative:\n",
+    "# If mean(dragon) > mean(others), p_one_sided = p_two_sided / 2; else 1 - p_two_sided / 2\n",
+    "mean_diff = dragons.mean() - others.mean()\n",
+    "if mean_diff > 0:\n",
+    "    p_one_sided = p_two_sided / 2\n",
+    "else:\n",
+    "    p_one_sided = 1 - (p_two_sided / 2)\n",
+    "\n",
+    "alpha = 0.05\n",
+    "print(f\"Mean(Dragon HP): {dragons.mean():.2f}  |  Mean(Other HP): {others.mean():.2f}\")\n",
+    "print(f\"Welch t-statistic: {t_stat:.3f}\")\n",
+    "print(f\"One-sided p-value (Dragon > Others): {p_one_sided:.4g}\")\n",
+    "print(\"Conclusion:\", \"Reject H0: Dragons have higher average HP.\"\n",
+    "      if p_one_sided < alpha else \"Fail to reject H0: Insufficient evidence that Dragons have higher HP.\")\n"
    ]
   },
   {
@@ -313,11 +342,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   stat  mean_legendary  mean_nonlegendary    t_stat      p_value  significant_0.05\n",
+      "     HP       92.738462          67.182313  8.981370 1.002691e-13              True\n",
+      " Attack      116.676923          75.669388 10.438134 2.520372e-16              True\n",
+      "Defense       99.661538          71.559184  7.637078 4.826998e-11              True\n",
+      "Sp. Atk      122.184615          68.454422 13.417450 1.551461e-21              True\n",
+      "Sp. Def      105.938462          68.892517 10.015697 2.294933e-15              True\n",
+      "  Speed      100.184615          65.455782 11.475044 1.049016e-18              True\n"
+     ]
+    }
+   ],
    "source": [
-    "#code here"
+    "stats_cols = [\"HP\", \"Attack\", \"Defense\", \"Sp. Atk\", \"Sp. Def\", \"Speed\"]\n",
+    "\n",
+    "leg = df[df[\"Legendary\"] == True]\n",
+    "non = df[df[\"Legendary\"] == False]\n",
+    "\n",
+    "results = []\n",
+    "for col in stats_cols:\n",
+    "    x = leg[col].dropna()\n",
+    "    y = non[col].dropna()\n",
+    "    t, p = st.ttest_ind(x, y, equal_var=False, nan_policy=\"omit\")\n",
+    "    results.append({\n",
+    "        \"stat\": col,\n",
+    "        \"mean_legendary\": float(np.mean(x)),\n",
+    "        \"mean_nonlegendary\": float(np.mean(y)),\n",
+    "        \"t_stat\": float(t),\n",
+    "        \"p_value\": float(p)\n",
+    "    })\n",
+    "\n",
+    "# Present as a small table and mark significance at alpha=0.05 (uncorrected)\n",
+    "import pandas as pd\n",
+    "res_df = pd.DataFrame(results)\n",
+    "res_df[\"significant_0.05\"] = res_df[\"p_value\"] < 0.05\n",
+    "print(res_df.to_string(index=False))"
    ]
   },
   {
@@ -337,7 +402,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -453,7 +518,7 @@
        "4       624.0       262.0         1.9250             65500.0  "
       ]
      },
-     "execution_count": 5,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -483,10 +548,58 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Close mean: 246951.98213501245\n",
+      "Far mean: 180678.44105790975\n",
+      "t-statistic: 3.400096139118546\n",
+      "p-value: 3.0064957768592614e-301\n",
+      "Reject H0 → Houses close to school/hospital have significantly different prices.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Coordinates\n",
+    "school = (-118, 34)\n",
+    "hospital = (-122, 37)\n",
+    "\n",
+    "# Function to calculate Euclidean distance\n",
+    "def euclidean_distance(x1, y1, x2, y2):\n",
+    "    return np.sqrt((x1 - x2)**2 + (y1 - y2)**2)\n",
+    "\n",
+    "# Calculate distance to school and hospital\n",
+    "df[\"dist_school\"] = np.sqrt((df[\"longitude\"] - school[0])**2 + (df[\"latitude\"] - school[1])**2)\n",
+    "df[\"dist_hospital\"] = np.sqrt((df[\"longitude\"] - hospital[0])**2 + (df[\"latitude\"] - hospital[1])**2)\n",
+    "\n",
+    "# Close if distance < 0.5 to either\n",
+    "df[\"close\"] = ((df[\"dist_school\"] < 0.5) | (df[\"dist_hospital\"] < 0.5))\n",
+    "\n",
+    "# Split groups\n",
+    "close_values = df[df[\"close\"] == True][\"median_house_value\"]\n",
+    "far_values   = df[df[\"close\"] == False][\"median_house_value\"]\n",
+    "\n",
+    "print(\"Close mean:\", close_values.mean())\n",
+    "print(\"Far mean:\", far_values.mean())\n",
+    "\n",
+    "# --- Hypothesis test ---\n",
+    "# Two-sample independent t-test\n",
+    "t_st, p_value = st.ttest_ind(close_values, far_values, equal_var=False)\n",
+    "\n",
+    "print(\"t-statistic:\", t_stat)\n",
+    "print(\"p-value:\", p_value)\n",
+    "\n",
+    "# Decision\n",
+    "alpha = 0.05\n",
+    "if p_value < alpha:\n",
+    "    print(\"Reject H0 → Houses close to school/hospital have significantly different prices.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject H0 → No significant difference in prices.\")"
+   ]
   },
   {
    "cell_type": "code",
@@ -498,7 +611,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -512,7 +625,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,