diff --git a/.ipynb_checkpoints/README-checkpoint.md b/.ipynb_checkpoints/README-checkpoint.md new file mode 100644 index 0000000..a561162 --- /dev/null +++ b/.ipynb_checkpoints/README-checkpoint.md @@ -0,0 +1,56 @@ +![logo_ironhack_blue 7](https://user-images.githubusercontent.com/23629340/40541063-a07a0a8a-601a-11e8-91b5-2f13e4e6b441.png) + +# Lab | Hypothesis Testing + +
+ +

Learning Goals

+
+ + This exercise allows you to practice and apply the concepts and techniques taught in class. + + Upon completion of this exercise, you will be able to: + + - Different kinds of hypothesis testing, such one sample, paired samples, and more + + +
+
+ +
+ +
+ +

Prerequisites

+
+Before this starting this lab, you should have learnt about: + +- Basic Probabilities concepts. +- Understand difference between distributions and where/how to apply them in a business context. +- Central Limit Theorem. +- Different kind of hypothesis testing. + +
+
+ +
+ +## Introduction + +In this exercise, you will have the opportunity to dive into one of the fundamental of hypothesis testing. + +
+ +**Happy coding!** :heart: + + + + +## Getting Started + +Complete the challenges in the notebook. Follow the instructions and add your code and explanations as necessary. + + +## Submission + +- Submit your solutions in the Student Portal. diff --git a/.ipynb_checkpoints/lab-hypothesis-testing-checkpoint.ipynb b/.ipynb_checkpoints/lab-hypothesis-testing-checkpoint.ipynb new file mode 100644 index 0000000..0cc26d5 --- /dev/null +++ b/.ipynb_checkpoints/lab-hypothesis-testing-checkpoint.ipynb @@ -0,0 +1,520 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Lab | Hypothesis Testing" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Objective**\n", + "\n", + "Welcome to the Hypothesis Testing Lab, where we embark on an enlightening journey through the realm of statistical decision-making! In this laboratory, we delve into various scenarios, applying the powerful tools of hypothesis testing to scrutinize and interpret data.\n", + "\n", + "From testing the mean of a single sample (One Sample T-Test), to investigating differences between independent groups (Two Sample T-Test), and exploring relationships within dependent samples (Paired Sample T-Test), our exploration knows no bounds. Furthermore, we'll venture into the realm of Analysis of Variance (ANOVA), unraveling the complexities of comparing means across multiple groups.\n", + "\n", + "So, grab your statistical tools, prepare your hypotheses, and let's embark on this fascinating journey of exploration and discovery in the world of hypothesis testing!" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Challenge 1**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this challenge, we will be working with pokemon data. The data can be found here:\n", + "\n", + "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#libraries\n", + "import pandas as pd\n", + "import scipy.stats as st\n", + "import numpy as np\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
NameType 1Type 2HPAttackDefenseSp. AtkSp. DefSpeedGenerationLegendary
0BulbasaurGrassPoison4549496565451False
1IvysaurGrassPoison6062638080601False
2VenusaurGrassPoison808283100100801False
3Mega VenusaurGrassPoison80100123122120801False
4CharmanderFireNaN3952436050651False
....................................
795DiancieRockFairy50100150100150506True
796Mega DiancieRockFairy501601101601101106True
797Hoopa ConfinedPsychicGhost8011060150130706True
798Hoopa UnboundPsychicDark8016060170130806True
799VolcanionFireWater8011012013090706True
\n", + "

800 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " Name Type 1 Type 2 HP Attack Defense Sp. Atk Sp. Def \\\n", + "0 Bulbasaur Grass Poison 45 49 49 65 65 \n", + "1 Ivysaur Grass Poison 60 62 63 80 80 \n", + "2 Venusaur Grass Poison 80 82 83 100 100 \n", + "3 Mega Venusaur Grass Poison 80 100 123 122 120 \n", + "4 Charmander Fire NaN 39 52 43 60 50 \n", + ".. ... ... ... .. ... ... ... ... \n", + "795 Diancie Rock Fairy 50 100 150 100 150 \n", + "796 Mega Diancie Rock Fairy 50 160 110 160 110 \n", + "797 Hoopa Confined Psychic Ghost 80 110 60 150 130 \n", + "798 Hoopa Unbound Psychic Dark 80 160 60 170 130 \n", + "799 Volcanion Fire Water 80 110 120 130 90 \n", + "\n", + " Speed Generation Legendary \n", + "0 45 1 False \n", + "1 60 1 False \n", + "2 80 1 False \n", + "3 80 1 False \n", + "4 65 1 False \n", + ".. ... ... ... \n", + "795 50 6 True \n", + "796 110 6 True \n", + "797 70 6 True \n", + "798 80 6 True \n", + "799 70 6 True \n", + "\n", + "[800 rows x 11 columns]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- We posit that Pokemons of type Dragon have, on average, more HP stats than Grass. Choose the propper test and, with 5% significance, comment your findings." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- We posit that Legendary Pokemons have different stats (HP, Attack, Defense, Sp.Atk, Sp.Def, Speed) when comparing with Non-Legendary. Choose the propper test and, with 5% significance, comment your findings.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [], + "source": [ + "#code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Challenge 2**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In this challenge, we will be working with california-housing data. The data can be found here:\n", + "- https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
longitudelatitudehousing_median_agetotal_roomstotal_bedroomspopulationhouseholdsmedian_incomemedian_house_value
0-114.3134.1915.05612.01283.01015.0472.01.493666900.0
1-114.4734.4019.07650.01901.01129.0463.01.820080100.0
2-114.5633.6917.0720.0174.0333.0117.01.650985700.0
3-114.5733.6414.01501.0337.0515.0226.03.191773400.0
4-114.5733.5720.01454.0326.0624.0262.01.925065500.0
\n", + "
" + ], + "text/plain": [ + " longitude latitude housing_median_age total_rooms total_bedrooms \\\n", + "0 -114.31 34.19 15.0 5612.0 1283.0 \n", + "1 -114.47 34.40 19.0 7650.0 1901.0 \n", + "2 -114.56 33.69 17.0 720.0 174.0 \n", + "3 -114.57 33.64 14.0 1501.0 337.0 \n", + "4 -114.57 33.57 20.0 1454.0 326.0 \n", + "\n", + " population households median_income median_house_value \n", + "0 1015.0 472.0 1.4936 66900.0 \n", + "1 1129.0 463.0 1.8200 80100.0 \n", + "2 333.0 117.0 1.6509 85700.0 \n", + "3 515.0 226.0 3.1917 73400.0 \n", + "4 624.0 262.0 1.9250 65500.0 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**We posit that houses close to either a school or a hospital are more expensive.**\n", + "\n", + "- School coordinates (-118, 34)\n", + "- Hospital coordinates (-122, 37)\n", + "\n", + "We consider a house (neighborhood) to be close to a school or hospital if the distance is lower than 0.50.\n", + "\n", + "Hint:\n", + "- Write a function to calculate euclidean distance from each house (neighborhood) to the school and to the hospital.\n", + "- Divide your dataset into houses close and far from either a hospital or school.\n", + "- Choose the propper test and, with 5% significance, comment your findings.\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..192a909 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -46,16 +46,795 @@ "import pandas as pd\n", "import scipy.stats as st\n", "import numpy as np\n", - "\n" + "import matplotlib.pyplot as plt\n", + "import plotly.express as px\n", + "import statsmodels.api as sm\n", + "import seaborn as sns\n", + "\n", + "from statsmodels.multivariate.manova import MANOVA\n", + "from scipy.stats import pearsonr\n", + "from scipy import stats\n", + "from scipy.stats import boxcox" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "metadata": {}, "outputs": [ { "data": { + "application/vnd.microsoft.datawrangler.viewer.v0+json": { + "columns": [ + { + "name": "index", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Name", + "rawType": "object", + "type": "unknown" + }, + { + "name": "Type 1", + "rawType": "object", + "type": "string" + }, + { + "name": "Type 2", + "rawType": "object", + "type": "unknown" + }, + { + "name": "HP", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Attack", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Defense", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Sp. Atk", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Sp. Def", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Speed", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Generation", + "rawType": "int64", + "type": "integer" + }, + { + "name": "Legendary", + "rawType": "bool", + "type": "boolean" + } + ], + "ref": "82b4772a-3916-4e29-a750-c6f768230442", + "rows": [ + [ + "0", + "Bulbasaur", + "Grass", + "Poison", + "45", + "49", + "49", + "65", + "65", + "45", + "1", + "False" + ], + [ + "1", + "Ivysaur", + "Grass", + "Poison", + "60", + "62", + "63", + "80", + "80", + "60", + "1", + "False" + ], + [ + "2", + "Venusaur", + "Grass", + "Poison", + "80", + "82", + "83", + "100", + "100", + "80", + "1", + "False" + ], + [ + "3", + "Mega Venusaur", + "Grass", + "Poison", + "80", + "100", + "123", + "122", + "120", + "80", + "1", + "False" + ], + [ + "4", + "Charmander", + "Fire", + null, + "39", + "52", + "43", + "60", + "50", + "65", + "1", + "False" + ], + [ + "5", + "Charmeleon", + "Fire", + null, + "58", + "64", + "58", + "80", + "65", + "80", + "1", + "False" + ], + [ + "6", + "Charizard", + "Fire", + "Flying", + "78", + "84", + "78", + "109", + "85", + "100", + "1", + "False" + ], + [ + "7", + "Mega Charizard X", + "Fire", + "Dragon", + "78", + "130", + "111", + "130", + "85", + "100", + "1", + "False" + ], + [ + "8", + "Mega Charizard Y", + "Fire", + "Flying", + "78", + "104", + "78", + "159", + "115", + "100", + "1", + "False" + ], + [ + "9", + "Squirtle", + "Water", + null, + "44", + "48", + "65", + "50", + "64", + "43", + "1", + "False" + ], + [ + "10", + "Wartortle", + "Water", + null, + "59", + "63", + "80", + "65", + "80", + "58", + "1", + "False" + ], + [ + "11", + "Blastoise", + "Water", + null, + "79", + "83", + "100", + "85", + "105", + "78", + "1", + "False" + ], + [ + "12", + "Mega Blastoise", + "Water", + null, + "79", + "103", + "120", + "135", + "115", + "78", + "1", + "False" + ], + [ + "13", + "Caterpie", + "Bug", + null, + "45", + "30", + "35", + "20", + "20", + "45", + "1", + "False" + ], + [ + "14", + "Metapod", + "Bug", + null, + "50", + "20", + "55", + "25", + "25", + "30", + "1", + "False" + ], + [ + "15", + "Butterfree", + "Bug", + "Flying", + "60", + "45", + "50", + "90", + "80", + "70", + "1", + "False" + ], + [ + "16", + "Weedle", + "Bug", + "Poison", + "40", + "35", + "30", + "20", + "20", + "50", + "1", + "False" + ], + [ + "17", + "Kakuna", + "Bug", + "Poison", + "45", + "25", + "50", + "25", + "25", + "35", + "1", + "False" + ], + [ + "18", + "Beedrill", + "Bug", + "Poison", + "65", + "90", + "40", + "45", + "80", + "75", + "1", + "False" + ], + [ + "19", + "Mega Beedrill", + "Bug", + "Poison", + "65", + "150", + "40", + "15", + "80", + "145", + "1", + "False" + ], + [ + "20", + "Pidgey", + "Normal", + "Flying", + "40", + "45", + "40", + "35", + "35", + "56", + "1", + "False" + ], + [ + "21", + "Pidgeotto", + "Normal", + "Flying", + "63", + "60", + "55", + "50", + "50", + "71", + "1", + "False" + ], + [ + "22", + "Pidgeot", + "Normal", + "Flying", + "83", + "80", + "75", + "70", + "70", + "101", + "1", + "False" + ], + [ + "23", + "Mega Pidgeot", + "Normal", + "Flying", + "83", + "80", + "80", + "135", + "80", + "121", + "1", + "False" + ], + [ + "24", + "Rattata", + "Normal", + null, + "30", + "56", + "35", + "25", + "35", + "72", + "1", + "False" + ], + [ + "25", + "Raticate", + "Normal", + null, + "55", + "81", + "60", + "50", + "70", + "97", + "1", + "False" + ], + [ + "26", + "Spearow", + "Normal", + "Flying", + "40", + "60", + "30", + "31", + "31", + "70", + "1", + "False" + ], + [ + "27", + "Fearow", + "Normal", + "Flying", + "65", + "90", + "65", + "61", + "61", + "100", + "1", + "False" + ], + [ + "28", + "Ekans", + "Poison", + null, + "35", + "60", + "44", + "40", + "54", + "55", + "1", + "False" + ], + [ + "29", + "Arbok", + "Poison", + null, + "60", + "85", + "69", + "65", + "79", + "80", + "1", + "False" + ], + [ + "30", + "Pikachu", + "Electric", + null, + "35", + "55", + "40", + "50", + "50", + "90", + "1", + "False" + ], + [ + "31", + "Raichu", + "Electric", + null, + "60", + "90", + "55", + "90", + "80", + "110", + "1", + "False" + ], + [ + "32", + "Sandshrew", + "Ground", + null, + "50", + "75", + "85", + "20", + "30", + "40", + "1", + "False" + ], + [ + "33", + "Sandslash", + "Ground", + null, + "75", + "100", + "110", + "45", + "55", + "65", + "1", + "False" + ], + [ + "34", + "Nidoran♀", + "Poison", + null, + "55", + "47", + "52", + "40", + "40", + "41", + "1", + "False" + ], + [ + "35", + "Nidorina", + "Poison", + null, + "70", + "62", + "67", + "55", + "55", + "56", + "1", + "False" + ], + [ + "36", + "Nidoqueen", + "Poison", + "Ground", + "90", + "92", + "87", + "75", + "85", + "76", + "1", + "False" + ], + [ + "37", + "Nidoran♂", + "Poison", + null, + "46", + "57", + "40", + "40", + "40", + "50", + "1", + "False" + ], + [ + "38", + "Nidorino", + "Poison", + null, + "61", + "72", + "57", + "55", + "55", + "65", + "1", + "False" + ], + [ + "39", + "Nidoking", + "Poison", + "Ground", + "81", + "102", + "77", + "85", + "75", + "85", + "1", + "False" + ], + [ + "40", + "Clefairy", + "Fairy", + null, + "70", + "45", + "48", + "60", + "65", + "35", + "1", + "False" + ], + [ + "41", + "Clefable", + "Fairy", + null, + "95", + "70", + "73", + "95", + "90", + "60", + "1", + "False" + ], + [ + "42", + "Vulpix", + "Fire", + null, + "38", + "41", + "40", + "50", + "65", + "65", + "1", + "False" + ], + [ + "43", + "Ninetales", + "Fire", + null, + "73", + "76", + "75", + "81", + "100", + "100", + "1", + "False" + ], + [ + "44", + "Jigglypuff", + "Normal", + "Fairy", + "115", + "45", + "20", + "45", + "25", + "20", + "1", + "False" + ], + [ + "45", + "Wigglytuff", + "Normal", + "Fairy", + "140", + "70", + "45", + "85", + "50", + "45", + "1", + "False" + ], + [ + "46", + "Zubat", + "Poison", + "Flying", + "40", + "45", + "35", + "30", + "40", + "55", + "1", + "False" + ], + [ + "47", + "Golbat", + "Poison", + "Flying", + "75", + "80", + "70", + "65", + "75", + "90", + "1", + "False" + ], + [ + "48", + "Oddish", + "Grass", + "Poison", + "45", + "50", + "55", + "75", + "65", + "30", + "1", + "False" + ], + [ + "49", + "Gloom", + "Grass", + "Poison", + "60", + "65", + "70", + "85", + "75", + "40", + "1", + "False" + ] + ], + "shape": { + "columns": 11, + "rows": 800 + } + }, "text/html": [ "
\n", "