From c9bda468520478d915a65e2ff6b29df1567baf30 Mon Sep 17 00:00:00 2001
From: Jhenifher  De Almeida <jhenifher.almeida23@gmail.com>
Date: Sat, 6 Sep 2025 12:22:18 +0200
Subject: [PATCH] Add files via upload

---
 lab-hypothesis-testing.ipynb | 234 ++++++++++++++++++++++++++++++++---
 1 file changed, 219 insertions(+), 15 deletions(-)

diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb
index 0cc26d5..b609fe2 100644
--- a/lab-hypothesis-testing.ipynb
+++ b/lab-hypothesis-testing.ipynb
@@ -38,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -51,7 +51,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -278,7 +278,7 @@
        "[800 rows x 11 columns]"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -297,11 +297,67 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#code here"
+    "import pandas as pd\n",
+    "from scipy import stats \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n",
+    "grass_hp = df[df['Type 2'] == 'Grass'] ['HP']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "t-statistic: 3.840727789973186\n",
+      "one-sided p-value: 0.00016096360035183744\n"
+     ]
+    }
+   ],
+   "source": [
+    "t_stat, p_value_two_sided = stats.ttest_ind(dragon_hp, grass_hp, equal_var=False)\n",
+    "\n",
+    "p_value_one_sided = p_value_two_sided / 2 if t_stat > 0 else 1 - (p_value_two_sided / 2)\n",
+    "\n",
+    "print(\"t-statistic:\", t_stat)\n",
+    "print(\"one-sided p-value:\", p_value_one_sided)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Conclusion at 5% significance\n",
+    "alpha = 0.05\n",
+    "if p_value_one_sided < alpha:\n",
+    "    print(\"Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\")\n",
+    "else:\n",
+    "    print(\"Fail to reject H0: No significant evidence that Dragon-type Pokémon have higher HP.\")"
    ]
   },
   {
@@ -313,11 +369,100 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.columns.tolist())\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
    "metadata": {},
    "outputs": [],
    "source": [
-    "#code here"
+    "# Rename columns so they don't have dots\n",
+    "df = df.rename(columns={\n",
+    "    'Sp.Atk': 'Sp_Atk',\n",
+    "    'Sp.Def': 'Sp_Def'\n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "PatsyError",
+     "evalue": "Error evaluating factor: NameError: name 'Defense' is not defined\n    HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary\n                  ^^^^^^^",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/compat.py:36\u001b[0m, in \u001b[0;36mcall_and_wrap_exc\u001b[0;34m(msg, origin, f, *args, **kwargs)\u001b[0m\n\u001b[1;32m     35\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 36\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m f(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m     37\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/eval.py:169\u001b[0m, in \u001b[0;36mEvalEnvironment.eval\u001b[0;34m(self, expr, source_name, inner_namespace)\u001b[0m\n\u001b[1;32m    168\u001b[0m code \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcompile\u001b[39m(expr, source_name, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mflags, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m--> 169\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28meval\u001b[39m(code, {}, VarLookupDict([inner_namespace]\n\u001b[1;32m    170\u001b[0m                                     \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_namespaces))\n",
+      "File \u001b[0;32m<string>:1\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'Defense' is not defined",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mPatsyError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[53], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mstatsmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmultivariate\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmanova\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MANOVA\n\u001b[0;32m----> 3\u001b[0m maov \u001b[38;5;241m=\u001b[39m MANOVA\u001b[38;5;241m.\u001b[39mfrom_formula(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary\u001b[39m\u001b[38;5;124m'\u001b[39m, data\u001b[38;5;241m=\u001b[39mdf)\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28mprint\u001b[39m(maov\u001b[38;5;241m.\u001b[39mmv_test())\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/statsmodels/base/model.py:203\u001b[0m, in \u001b[0;36mModel.from_formula\u001b[0;34m(cls, formula, data, subset, drop_cols, *args, **kwargs)\u001b[0m\n\u001b[1;32m    200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m missing \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m:  \u001b[38;5;66;03m# with patsy it's drop or raise. let's raise.\u001b[39;00m\n\u001b[1;32m    201\u001b[0m     missing \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 203\u001b[0m tmp \u001b[38;5;241m=\u001b[39m handle_formula_data(data, \u001b[38;5;28;01mNone\u001b[39;00m, formula, depth\u001b[38;5;241m=\u001b[39meval_env,\n\u001b[1;32m    204\u001b[0m                           missing\u001b[38;5;241m=\u001b[39mmissing)\n\u001b[1;32m    205\u001b[0m ((endog, exog), missing_idx, design_info) \u001b[38;5;241m=\u001b[39m tmp\n\u001b[1;32m    206\u001b[0m max_endog \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_formula_max_endog\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/statsmodels/formula/formulatools.py:63\u001b[0m, in \u001b[0;36mhandle_formula_data\u001b[0;34m(Y, X, formula, depth, missing)\u001b[0m\n\u001b[1;32m     61\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     62\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m data_util\u001b[38;5;241m.\u001b[39m_is_using_pandas(Y, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m---> 63\u001b[0m         result \u001b[38;5;241m=\u001b[39m dmatrices(formula, Y, depth, return_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdataframe\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m     64\u001b[0m                            NA_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[1;32m     65\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     66\u001b[0m         result \u001b[38;5;241m=\u001b[39m dmatrices(formula, Y, depth, return_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdataframe\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m     67\u001b[0m                            NA_action\u001b[38;5;241m=\u001b[39mna_action)\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/highlevel.py:309\u001b[0m, in \u001b[0;36mdmatrices\u001b[0;34m(formula_like, data, eval_env, NA_action, return_type)\u001b[0m\n\u001b[1;32m    299\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Construct two design matrices given a formula_like and data.\u001b[39;00m\n\u001b[1;32m    300\u001b[0m \n\u001b[1;32m    301\u001b[0m \u001b[38;5;124;03mThis function is identical to :func:`dmatrix`, except that it requires\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    306\u001b[0m \u001b[38;5;124;03mSee :func:`dmatrix` for details.\u001b[39;00m\n\u001b[1;32m    307\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    308\u001b[0m eval_env \u001b[38;5;241m=\u001b[39m EvalEnvironment\u001b[38;5;241m.\u001b[39mcapture(eval_env, reference\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 309\u001b[0m (lhs, rhs) \u001b[38;5;241m=\u001b[39m _do_highlevel_design(formula_like, data, eval_env,\n\u001b[1;32m    310\u001b[0m                                   NA_action, return_type)\n\u001b[1;32m    311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m lhs\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m    312\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m PatsyError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel is missing required outcome variables\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/highlevel.py:164\u001b[0m, in \u001b[0;36m_do_highlevel_design\u001b[0;34m(formula_like, data, eval_env, NA_action, return_type)\u001b[0m\n\u001b[1;32m    162\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdata_iter_maker\u001b[39m():\n\u001b[1;32m    163\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28miter\u001b[39m([data])\n\u001b[0;32m--> 164\u001b[0m design_infos \u001b[38;5;241m=\u001b[39m _try_incr_builders(formula_like, data_iter_maker, eval_env,\n\u001b[1;32m    165\u001b[0m                                   NA_action)\n\u001b[1;32m    166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m design_infos \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    167\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m build_design_matrices(design_infos, data,\n\u001b[1;32m    168\u001b[0m                                  NA_action\u001b[38;5;241m=\u001b[39mNA_action,\n\u001b[1;32m    169\u001b[0m                                  return_type\u001b[38;5;241m=\u001b[39mreturn_type)\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/highlevel.py:66\u001b[0m, in \u001b[0;36m_try_incr_builders\u001b[0;34m(formula_like, data_iter_maker, eval_env, NA_action)\u001b[0m\n\u001b[1;32m     64\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(formula_like, ModelDesc):\n\u001b[1;32m     65\u001b[0m     \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(eval_env, EvalEnvironment)\n\u001b[0;32m---> 66\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m design_matrix_builders([formula_like\u001b[38;5;241m.\u001b[39mlhs_termlist,\n\u001b[1;32m     67\u001b[0m                                    formula_like\u001b[38;5;241m.\u001b[39mrhs_termlist],\n\u001b[1;32m     68\u001b[0m                                   data_iter_maker,\n\u001b[1;32m     69\u001b[0m                                   eval_env,\n\u001b[1;32m     70\u001b[0m                                   NA_action)\n\u001b[1;32m     71\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     72\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/build.py:693\u001b[0m, in \u001b[0;36mdesign_matrix_builders\u001b[0;34m(termlists, data_iter_maker, eval_env, NA_action)\u001b[0m\n\u001b[1;32m    689\u001b[0m factor_states \u001b[38;5;241m=\u001b[39m _factors_memorize(all_factors, data_iter_maker, eval_env)\n\u001b[1;32m    690\u001b[0m \u001b[38;5;66;03m# Now all the factors have working eval methods, so we can evaluate them\u001b[39;00m\n\u001b[1;32m    691\u001b[0m \u001b[38;5;66;03m# on some data to find out what type of data they return.\u001b[39;00m\n\u001b[1;32m    692\u001b[0m (num_column_counts,\n\u001b[0;32m--> 693\u001b[0m  cat_levels_contrasts) \u001b[38;5;241m=\u001b[39m _examine_factor_types(all_factors,\n\u001b[1;32m    694\u001b[0m                                                factor_states,\n\u001b[1;32m    695\u001b[0m                                                data_iter_maker,\n\u001b[1;32m    696\u001b[0m                                                NA_action)\n\u001b[1;32m    697\u001b[0m \u001b[38;5;66;03m# Now we need the factor infos, which encapsulate the knowledge of\u001b[39;00m\n\u001b[1;32m    698\u001b[0m \u001b[38;5;66;03m# how to turn any given factor into a chunk of data:\u001b[39;00m\n\u001b[1;32m    699\u001b[0m factor_infos \u001b[38;5;241m=\u001b[39m {}\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/build.py:443\u001b[0m, in \u001b[0;36m_examine_factor_types\u001b[0;34m(factors, factor_states, data_iter_maker, NA_action)\u001b[0m\n\u001b[1;32m    441\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m data \u001b[38;5;129;01min\u001b[39;00m data_iter_maker():\n\u001b[1;32m    442\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m factor \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(examine_needed):\n\u001b[0;32m--> 443\u001b[0m         value \u001b[38;5;241m=\u001b[39m factor\u001b[38;5;241m.\u001b[39meval(factor_states[factor], data)\n\u001b[1;32m    444\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m factor \u001b[38;5;129;01min\u001b[39;00m cat_sniffers \u001b[38;5;129;01mor\u001b[39;00m guess_categorical(value):\n\u001b[1;32m    445\u001b[0m             \u001b[38;5;28;01mif\u001b[39;00m factor \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m cat_sniffers:\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/eval.py:568\u001b[0m, in \u001b[0;36mEvalFactor.eval\u001b[0;34m(self, memorize_state, data)\u001b[0m\n\u001b[1;32m    567\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21meval\u001b[39m(\u001b[38;5;28mself\u001b[39m, memorize_state, data):\n\u001b[0;32m--> 568\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_eval(memorize_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval_code\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m    569\u001b[0m                       memorize_state,\n\u001b[1;32m    570\u001b[0m                       data)\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/eval.py:551\u001b[0m, in \u001b[0;36mEvalFactor._eval\u001b[0;34m(self, code, memorize_state, data)\u001b[0m\n\u001b[1;32m    549\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_eval\u001b[39m(\u001b[38;5;28mself\u001b[39m, code, memorize_state, data):\n\u001b[1;32m    550\u001b[0m     inner_namespace \u001b[38;5;241m=\u001b[39m VarLookupDict([data, memorize_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtransforms\u001b[39m\u001b[38;5;124m\"\u001b[39m]])\n\u001b[0;32m--> 551\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m call_and_wrap_exc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError evaluating factor\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m    552\u001b[0m                              \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m    553\u001b[0m                              memorize_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval_env\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39meval,\n\u001b[1;32m    554\u001b[0m                              code,\n\u001b[1;32m    555\u001b[0m                              inner_namespace\u001b[38;5;241m=\u001b[39minner_namespace)\n",
+      "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/compat.py:43\u001b[0m, in \u001b[0;36mcall_and_wrap_exc\u001b[0;34m(msg, origin, f, *args, **kwargs)\u001b[0m\n\u001b[1;32m     39\u001b[0m     new_exc \u001b[38;5;241m=\u001b[39m PatsyError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     40\u001b[0m                          \u001b[38;5;241m%\u001b[39m (msg, e\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, e),\n\u001b[1;32m     41\u001b[0m                          origin)\n\u001b[1;32m     42\u001b[0m     \u001b[38;5;66;03m# Use 'exec' to hide this syntax from the Python 2 parser:\u001b[39;00m\n\u001b[0;32m---> 43\u001b[0m     exec(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise new_exc from e\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     44\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     45\u001b[0m     \u001b[38;5;66;03m# In python 2, we just let the original exception escape -- better\u001b[39;00m\n\u001b[1;32m     46\u001b[0m     \u001b[38;5;66;03m# than destroying the traceback. But if it's a PatsyError, we can\u001b[39;00m\n\u001b[1;32m     47\u001b[0m     \u001b[38;5;66;03m# at least set the origin properly.\u001b[39;00m\n\u001b[1;32m     48\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e, PatsyError):\n",
+      "File \u001b[0;32m<string>:1\u001b[0m\n",
+      "\u001b[0;31mPatsyError\u001b[0m: Error evaluating factor: NameError: name 'Defense' is not defined\n    HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary\n                  ^^^^^^^"
+     ]
+    }
+   ],
+   "source": [
+    "from statsmodels.multivariate.manova import MANOVA\n",
+    "\n",
+    "maov = MANOVA.from_formula('HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary', data=df)\n",
+    "print(maov.mv_test())\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "unterminated string literal (detected at line 8) (256359532.py, line 8)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  Cell \u001b[0;32mIn[50], line 8\u001b[0;36m\u001b[0m\n\u001b[0;31m    maov = MANOVA.from_formula( HP + Attack + Defense + 'Sp.Atk' + 'Sp.Def' + Speed ~ Legendary', data=df)\u001b[0m\n\u001b[0m                                                                                               ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m unterminated string literal (detected at line 8)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from statsmodels.multivariate.manova import MANOVA\n",
+    "\n",
+    "# Encode Legendary as categorical\n",
+    "df['Legendary'] = df['Legendary'].astype(str)\n",
+    "\n",
+    "# Run MANOVA\n",
+    "maov = MANOVA.from_formula( HP + Attack + Defense + 'Sp.Atk' + 'Sp.Def' + Speed ~ Legendary', data=df)\n",
+    "print(maov.mv_test())\n"
    ]
   },
   {
@@ -337,7 +482,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 58,
    "metadata": {},
    "outputs": [
     {
@@ -453,7 +598,7 @@
        "4       624.0       262.0         1.9250             65500.0  "
       ]
      },
-     "execution_count": 5,
+     "execution_count": 58,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -463,6 +608,13 @@
     "df.head()"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -483,22 +635,74 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 62,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy import stats\n",
+    "\n",
+    "\n",
+    "school = np.array([-118, 34])\n",
+    "hospital = np.array([-122, 37])\n",
+    "\n",
+    "def euclidean_distance(x, y, point):\n",
+    "    return np.sqrt((x - point[0])**2 + (y - point[1])**2)\n",
+    "\n",
+    "# Compute distances\n",
+    "df['dist_school'] = df.apply(lambda row: euclidean_distance(row['longitude'], row['latitude'], school), axis=1)\n",
+    "df['dist_hospital'] = df.apply(lambda row: euclidean_distance(row['longitude'], row['latitude'], hospital), axis=1)\n",
+    "\n",
+    "\n",
+    "df['close'] = ((df['dist_school'] < 0.5) | (df['dist_hospital'] < 0.5)).astype(int)\n"
+   ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 67,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "close_prices = df[df['close'] == 1]['median_house_value']\n",
+    "far_prices   = df[df['close'] == 0]['median_house_value']\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "t-statistic: 37.992330214201516\n",
+      "one-sided p-value: 1.5032478884296307e-301\n",
+      "✅ Reject H0: Houses close to school/hospital are significantly more expensive.\n"
+     ]
+    }
+   ],
+   "source": [
+    "t_stat, p_value_two_sided = stats.ttest_ind(close_prices, far_prices, equal_var=False)\n",
+    "\n",
+    "# One-sided test: close > far\n",
+    "p_value_one_sided = p_value_two_sided / 2 if t_stat > 0 else 1 - (p_value_two_sided / 2)\n",
+    "\n",
+    "print(\"t-statistic:\", t_stat)\n",
+    "print(\"one-sided p-value:\", p_value_one_sided)\n",
+    "\n",
+    "alpha = 0.05\n",
+    "if p_value_one_sided < alpha:\n",
+    "    print(\"✅ Reject H0: Houses close to school/hospital are significantly more expensive.\")\n",
+    "else:\n",
+    "    print(\"❌ Fail to reject H0: No significant evidence that close houses are more expensive.\")\n"
+   ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -512,7 +716,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,