From c9bda468520478d915a65e2ff6b29df1567baf30 Mon Sep 17 00:00:00 2001 From: Jhenifher De Almeida Date: Sat, 6 Sep 2025 12:22:18 +0200 Subject: [PATCH] Add files via upload --- lab-hypothesis-testing.ipynb | 234 ++++++++++++++++++++++++++++++++--- 1 file changed, 219 insertions(+), 15 deletions(-) diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..b609fe2 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 25, "metadata": {}, "outputs": [ { @@ -278,7 +278,7 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 25, "metadata": {}, "output_type": "execute_result" } @@ -297,11 +297,67 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 26, "metadata": {}, "outputs": [], "source": [ - "#code here" + "import pandas as pd\n", + "from scipy import stats \n" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [], + "source": [ + "dragon_hp = df[df['Type 1'] == 'Dragon']['HP']\n", + "grass_hp = df[df['Type 2'] == 'Grass'] ['HP']" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t-statistic: 3.840727789973186\n", + "one-sided p-value: 0.00016096360035183744\n" + ] + } + ], + "source": [ + "t_stat, p_value_two_sided = stats.ttest_ind(dragon_hp, grass_hp, equal_var=False)\n", + "\n", + "p_value_one_sided = p_value_two_sided / 2 if t_stat > 0 else 1 - (p_value_two_sided / 2)\n", + "\n", + "print(\"t-statistic:\", t_stat)\n", + "print(\"one-sided p-value:\", p_value_one_sided)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\n" + ] + } + ], + "source": [ + "# Conclusion at 5% significance\n", + "alpha = 0.05\n", + "if p_value_one_sided < alpha:\n", + " print(\"Reject H0: Dragon-type Pokémon have significantly higher HP than Grass-type Pokémon.\")\n", + "else:\n", + " print(\"Fail to reject H0: No significant evidence that Dragon-type Pokémon have higher HP.\")" ] }, { @@ -313,11 +369,100 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['Name', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Generation', 'Legendary']\n" + ] + } + ], + "source": [ + "print(df.columns.tolist())\n" + ] + }, + { + "cell_type": "code", + "execution_count": 48, "metadata": {}, "outputs": [], "source": [ - "#code here" + "# Rename columns so they don't have dots\n", + "df = df.rename(columns={\n", + " 'Sp.Atk': 'Sp_Atk',\n", + " 'Sp.Def': 'Sp_Def'\n", + "})" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [ + { + "ename": "PatsyError", + "evalue": "Error evaluating factor: NameError: name 'Defense' is not defined\n HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary\n ^^^^^^^", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/compat.py:36\u001b[0m, in \u001b[0;36mcall_and_wrap_exc\u001b[0;34m(msg, origin, f, *args, **kwargs)\u001b[0m\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 36\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m f(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 37\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/eval.py:169\u001b[0m, in \u001b[0;36mEvalEnvironment.eval\u001b[0;34m(self, expr, source_name, inner_namespace)\u001b[0m\n\u001b[1;32m 168\u001b[0m code \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcompile\u001b[39m(expr, source_name, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mflags, \u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m--> 169\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28meval\u001b[39m(code, {}, VarLookupDict([inner_namespace]\n\u001b[1;32m 170\u001b[0m \u001b[38;5;241m+\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_namespaces))\n", + "File \u001b[0;32m:1\u001b[0m\n", + "\u001b[0;31mNameError\u001b[0m: name 'Defense' is not defined", + "\nThe above exception was the direct cause of the following exception:\n", + "\u001b[0;31mPatsyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[53], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mstatsmodels\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmultivariate\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmanova\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MANOVA\n\u001b[0;32m----> 3\u001b[0m maov \u001b[38;5;241m=\u001b[39m MANOVA\u001b[38;5;241m.\u001b[39mfrom_formula(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary\u001b[39m\u001b[38;5;124m'\u001b[39m, data\u001b[38;5;241m=\u001b[39mdf)\n\u001b[1;32m 4\u001b[0m \u001b[38;5;28mprint\u001b[39m(maov\u001b[38;5;241m.\u001b[39mmv_test())\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/statsmodels/base/model.py:203\u001b[0m, in \u001b[0;36mModel.from_formula\u001b[0;34m(cls, formula, data, subset, drop_cols, *args, **kwargs)\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m missing \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m'\u001b[39m: \u001b[38;5;66;03m# with patsy it's drop or raise. let's raise.\u001b[39;00m\n\u001b[1;32m 201\u001b[0m missing \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m'\u001b[39m\n\u001b[0;32m--> 203\u001b[0m tmp \u001b[38;5;241m=\u001b[39m handle_formula_data(data, \u001b[38;5;28;01mNone\u001b[39;00m, formula, depth\u001b[38;5;241m=\u001b[39meval_env,\n\u001b[1;32m 204\u001b[0m missing\u001b[38;5;241m=\u001b[39mmissing)\n\u001b[1;32m 205\u001b[0m ((endog, exog), missing_idx, design_info) \u001b[38;5;241m=\u001b[39m tmp\n\u001b[1;32m 206\u001b[0m max_endog \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_formula_max_endog\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/statsmodels/formula/formulatools.py:63\u001b[0m, in \u001b[0;36mhandle_formula_data\u001b[0;34m(Y, X, formula, depth, missing)\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m data_util\u001b[38;5;241m.\u001b[39m_is_using_pandas(Y, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m---> 63\u001b[0m result \u001b[38;5;241m=\u001b[39m dmatrices(formula, Y, depth, return_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdataframe\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 64\u001b[0m NA_action\u001b[38;5;241m=\u001b[39mna_action)\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 66\u001b[0m result \u001b[38;5;241m=\u001b[39m dmatrices(formula, Y, depth, return_type\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdataframe\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m 67\u001b[0m NA_action\u001b[38;5;241m=\u001b[39mna_action)\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/highlevel.py:309\u001b[0m, in \u001b[0;36mdmatrices\u001b[0;34m(formula_like, data, eval_env, NA_action, return_type)\u001b[0m\n\u001b[1;32m 299\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Construct two design matrices given a formula_like and data.\u001b[39;00m\n\u001b[1;32m 300\u001b[0m \n\u001b[1;32m 301\u001b[0m \u001b[38;5;124;03mThis function is identical to :func:`dmatrix`, except that it requires\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 306\u001b[0m \u001b[38;5;124;03mSee :func:`dmatrix` for details.\u001b[39;00m\n\u001b[1;32m 307\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 308\u001b[0m eval_env \u001b[38;5;241m=\u001b[39m EvalEnvironment\u001b[38;5;241m.\u001b[39mcapture(eval_env, reference\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m--> 309\u001b[0m (lhs, rhs) \u001b[38;5;241m=\u001b[39m _do_highlevel_design(formula_like, data, eval_env,\n\u001b[1;32m 310\u001b[0m NA_action, return_type)\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m lhs\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m] \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[1;32m 312\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m PatsyError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel is missing required outcome variables\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/highlevel.py:164\u001b[0m, in \u001b[0;36m_do_highlevel_design\u001b[0;34m(formula_like, data, eval_env, NA_action, return_type)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdata_iter_maker\u001b[39m():\n\u001b[1;32m 163\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28miter\u001b[39m([data])\n\u001b[0;32m--> 164\u001b[0m design_infos \u001b[38;5;241m=\u001b[39m _try_incr_builders(formula_like, data_iter_maker, eval_env,\n\u001b[1;32m 165\u001b[0m NA_action)\n\u001b[1;32m 166\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m design_infos \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 167\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m build_design_matrices(design_infos, data,\n\u001b[1;32m 168\u001b[0m NA_action\u001b[38;5;241m=\u001b[39mNA_action,\n\u001b[1;32m 169\u001b[0m return_type\u001b[38;5;241m=\u001b[39mreturn_type)\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/highlevel.py:66\u001b[0m, in \u001b[0;36m_try_incr_builders\u001b[0;34m(formula_like, data_iter_maker, eval_env, NA_action)\u001b[0m\n\u001b[1;32m 64\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(formula_like, ModelDesc):\n\u001b[1;32m 65\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(eval_env, EvalEnvironment)\n\u001b[0;32m---> 66\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m design_matrix_builders([formula_like\u001b[38;5;241m.\u001b[39mlhs_termlist,\n\u001b[1;32m 67\u001b[0m formula_like\u001b[38;5;241m.\u001b[39mrhs_termlist],\n\u001b[1;32m 68\u001b[0m data_iter_maker,\n\u001b[1;32m 69\u001b[0m eval_env,\n\u001b[1;32m 70\u001b[0m NA_action)\n\u001b[1;32m 71\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/build.py:693\u001b[0m, in \u001b[0;36mdesign_matrix_builders\u001b[0;34m(termlists, data_iter_maker, eval_env, NA_action)\u001b[0m\n\u001b[1;32m 689\u001b[0m factor_states \u001b[38;5;241m=\u001b[39m _factors_memorize(all_factors, data_iter_maker, eval_env)\n\u001b[1;32m 690\u001b[0m \u001b[38;5;66;03m# Now all the factors have working eval methods, so we can evaluate them\u001b[39;00m\n\u001b[1;32m 691\u001b[0m \u001b[38;5;66;03m# on some data to find out what type of data they return.\u001b[39;00m\n\u001b[1;32m 692\u001b[0m (num_column_counts,\n\u001b[0;32m--> 693\u001b[0m cat_levels_contrasts) \u001b[38;5;241m=\u001b[39m _examine_factor_types(all_factors,\n\u001b[1;32m 694\u001b[0m factor_states,\n\u001b[1;32m 695\u001b[0m data_iter_maker,\n\u001b[1;32m 696\u001b[0m NA_action)\n\u001b[1;32m 697\u001b[0m \u001b[38;5;66;03m# Now we need the factor infos, which encapsulate the knowledge of\u001b[39;00m\n\u001b[1;32m 698\u001b[0m \u001b[38;5;66;03m# how to turn any given factor into a chunk of data:\u001b[39;00m\n\u001b[1;32m 699\u001b[0m factor_infos \u001b[38;5;241m=\u001b[39m {}\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/build.py:443\u001b[0m, in \u001b[0;36m_examine_factor_types\u001b[0;34m(factors, factor_states, data_iter_maker, NA_action)\u001b[0m\n\u001b[1;32m 441\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m data \u001b[38;5;129;01min\u001b[39;00m data_iter_maker():\n\u001b[1;32m 442\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m factor \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mlist\u001b[39m(examine_needed):\n\u001b[0;32m--> 443\u001b[0m value \u001b[38;5;241m=\u001b[39m factor\u001b[38;5;241m.\u001b[39meval(factor_states[factor], data)\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m factor \u001b[38;5;129;01min\u001b[39;00m cat_sniffers \u001b[38;5;129;01mor\u001b[39;00m guess_categorical(value):\n\u001b[1;32m 445\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m factor \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m cat_sniffers:\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/eval.py:568\u001b[0m, in \u001b[0;36mEvalFactor.eval\u001b[0;34m(self, memorize_state, data)\u001b[0m\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21meval\u001b[39m(\u001b[38;5;28mself\u001b[39m, memorize_state, data):\n\u001b[0;32m--> 568\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_eval(memorize_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval_code\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m 569\u001b[0m memorize_state,\n\u001b[1;32m 570\u001b[0m data)\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/eval.py:551\u001b[0m, in \u001b[0;36mEvalFactor._eval\u001b[0;34m(self, code, memorize_state, data)\u001b[0m\n\u001b[1;32m 549\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_eval\u001b[39m(\u001b[38;5;28mself\u001b[39m, code, memorize_state, data):\n\u001b[1;32m 550\u001b[0m inner_namespace \u001b[38;5;241m=\u001b[39m VarLookupDict([data, memorize_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtransforms\u001b[39m\u001b[38;5;124m\"\u001b[39m]])\n\u001b[0;32m--> 551\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m call_and_wrap_exc(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mError evaluating factor\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 552\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 553\u001b[0m memorize_state[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval_env\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39meval,\n\u001b[1;32m 554\u001b[0m code,\n\u001b[1;32m 555\u001b[0m inner_namespace\u001b[38;5;241m=\u001b[39minner_namespace)\n", + "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/patsy/compat.py:43\u001b[0m, in \u001b[0;36mcall_and_wrap_exc\u001b[0;34m(msg, origin, f, *args, **kwargs)\u001b[0m\n\u001b[1;32m 39\u001b[0m new_exc \u001b[38;5;241m=\u001b[39m PatsyError(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 40\u001b[0m \u001b[38;5;241m%\u001b[39m (msg, e\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, e),\n\u001b[1;32m 41\u001b[0m origin)\n\u001b[1;32m 42\u001b[0m \u001b[38;5;66;03m# Use 'exec' to hide this syntax from the Python 2 parser:\u001b[39;00m\n\u001b[0;32m---> 43\u001b[0m exec(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise new_exc from e\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 44\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 45\u001b[0m \u001b[38;5;66;03m# In python 2, we just let the original exception escape -- better\u001b[39;00m\n\u001b[1;32m 46\u001b[0m \u001b[38;5;66;03m# than destroying the traceback. But if it's a PatsyError, we can\u001b[39;00m\n\u001b[1;32m 47\u001b[0m \u001b[38;5;66;03m# at least set the origin properly.\u001b[39;00m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(e, PatsyError):\n", + "File \u001b[0;32m:1\u001b[0m\n", + "\u001b[0;31mPatsyError\u001b[0m: Error evaluating factor: NameError: name 'Defense' is not defined\n HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary\n ^^^^^^^" + ] + } + ], + "source": [ + "from statsmodels.multivariate.manova import MANOVA\n", + "\n", + "maov = MANOVA.from_formula('HP + Attack + Defense + Sp_Atk + Sp_Def + Speed ~ Legendary', data=df)\n", + "print(maov.mv_test())\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "unterminated string literal (detected at line 8) (256359532.py, line 8)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Cell \u001b[0;32mIn[50], line 8\u001b[0;36m\u001b[0m\n\u001b[0;31m maov = MANOVA.from_formula( HP + Attack + Defense + 'Sp.Atk' + 'Sp.Def' + Speed ~ Legendary', data=df)\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m unterminated string literal (detected at line 8)\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from statsmodels.multivariate.manova import MANOVA\n", + "\n", + "# Encode Legendary as categorical\n", + "df['Legendary'] = df['Legendary'].astype(str)\n", + "\n", + "# Run MANOVA\n", + "maov = MANOVA.from_formula( HP + Attack + Defense + 'Sp.Atk' + 'Sp.Def' + Speed ~ Legendary', data=df)\n", + "print(maov.mv_test())\n" ] }, { @@ -337,7 +482,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 58, "metadata": {}, "outputs": [ { @@ -453,7 +598,7 @@ "4 624.0 262.0 1.9250 65500.0 " ] }, - "execution_count": 5, + "execution_count": 58, "metadata": {}, "output_type": "execute_result" } @@ -463,6 +608,13 @@ "df.head()" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -483,22 +635,74 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 62, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from scipy import stats\n", + "\n", + "\n", + "school = np.array([-118, 34])\n", + "hospital = np.array([-122, 37])\n", + "\n", + "def euclidean_distance(x, y, point):\n", + " return np.sqrt((x - point[0])**2 + (y - point[1])**2)\n", + "\n", + "# Compute distances\n", + "df['dist_school'] = df.apply(lambda row: euclidean_distance(row['longitude'], row['latitude'], school), axis=1)\n", + "df['dist_hospital'] = df.apply(lambda row: euclidean_distance(row['longitude'], row['latitude'], hospital), axis=1)\n", + "\n", + "\n", + "df['close'] = ((df['dist_school'] < 0.5) | (df['dist_hospital'] < 0.5)).astype(int)\n" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 67, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "close_prices = df[df['close'] == 1]['median_house_value']\n", + "far_prices = df[df['close'] == 0]['median_house_value']\n" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "t-statistic: 37.992330214201516\n", + "one-sided p-value: 1.5032478884296307e-301\n", + "✅ Reject H0: Houses close to school/hospital are significantly more expensive.\n" + ] + } + ], + "source": [ + "t_stat, p_value_two_sided = stats.ttest_ind(close_prices, far_prices, equal_var=False)\n", + "\n", + "# One-sided test: close > far\n", + "p_value_one_sided = p_value_two_sided / 2 if t_stat > 0 else 1 - (p_value_two_sided / 2)\n", + "\n", + "print(\"t-statistic:\", t_stat)\n", + "print(\"one-sided p-value:\", p_value_one_sided)\n", + "\n", + "alpha = 0.05\n", + "if p_value_one_sided < alpha:\n", + " print(\"✅ Reject H0: Houses close to school/hospital are significantly more expensive.\")\n", + "else:\n", + " print(\"❌ Fail to reject H0: No significant evidence that close houses are more expensive.\")\n" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -512,7 +716,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.7" } }, "nbformat": 4,