diff --git a/01_materials/labs/01_setup.ipynb b/01_materials/labs/01_setup.ipynb index 92feacca8..d1fea6a68 100644 --- a/01_materials/labs/01_setup.ipynb +++ b/01_materials/labs/01_setup.ipynb @@ -247,7 +247,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -257,7 +257,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -273,9 +273,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2026-01-14 21:07:16,806, 492669213.py, 3, INFO, Hello world!\n" + ] + } + ], "source": [ "from utils.logger import get_logger\n", "_logs = get_logger(__name__)\n", @@ -307,7 +315,7 @@ ], "metadata": { "kernelspec": { - "display_name": "production-env (3.11.13)", + "display_name": "production-env (3.11.14)", "language": "python", "name": "python3" }, @@ -321,7 +329,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.11.14" } }, "nbformat": 4, diff --git a/01_materials/labs/04_transforms.ipynb b/01_materials/labs/04_transforms.ipynb index 614061fa2..cb45613ee 100644 --- a/01_materials/labs/04_transforms.ipynb +++ b/01_materials/labs/04_transforms.ipynb @@ -190,9 +190,1330 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('preprocess',\n",
+       "                 ColumnTransformer(remainder='passthrough',\n",
+       "                                   transformers=[('numeric_simple',\n",
+       "                                                  Pipeline(steps=[('imputer',\n",
+       "                                                                   SimpleImputer(strategy='median')),\n",
+       "                                                                  ('standardizer',\n",
+       "                                                                   StandardScaler())]),\n",
+       "                                                  ['revolving_unsecured_line_utilization',\n",
+       "                                                   'age', 'num_30_59_days_late',\n",
+       "                                                   'debt_ratio',\n",
+       "                                                   'monthly_income',\n",
+       "                                                   'num_open_credit_loans',\n",
+       "                                                   'num_90_days_late',\n",
+       "                                                   'num_real_estate_loans',\n",
+       "                                                   'num_60_89_days_late',\n",
+       "                                                   'num_dependents'])])),\n",
+       "                ('model', LogisticRegression())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('numeric_simple',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='median')),\n", + " ('standardizer',\n", + " StandardScaler())]),\n", + " ['revolving_unsecured_line_utilization',\n", + " 'age', 'num_30_59_days_late',\n", + " 'debt_ratio',\n", + " 'monthly_income',\n", + " 'num_open_credit_loans',\n", + " 'num_90_days_late',\n", + " 'num_real_estate_loans',\n", + " 'num_60_89_days_late',\n", + " 'num_dependents'])])),\n", + " ('model', LogisticRegression())])" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.pipeline import Pipeline\n", "from sklearn.compose import ColumnTransformer\n", @@ -415,7 +1736,7 @@ ], "metadata": { "kernelspec": { - "display_name": "production-env (3.11.13)", + "display_name": "production-env (3.11.14)", "language": "python", "name": "python3" }, @@ -429,7 +1750,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.13" + "version": "3.11.14" } }, "nbformat": 4, diff --git a/02_activities/assignments/assignment_2.ipynb b/02_activities/assignments/assignment_2.ipynb index 29d661c57..058691cf0 100644 --- a/02_activities/assignments/assignment_2.ipynb +++ b/02_activities/assignments/assignment_2.ipynb @@ -97,18 +97,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The dotenv extension is already loaded. To reload it, use:\n", + " %reload_ext dotenv\n" + ] + } + ], "source": [ - "# Load the libraries as required." + "# Load the libraries as required.\n", + "\n", + "%load_ext dotenv\n", + "%dotenv\n" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], + "source": [ + "# Load the libraries as required.\n", + "from pathlib import Path\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 517 entries, 0 to 516\n", + "Data columns (total 13 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 coord_x 517 non-null int64 \n", + " 1 coord_y 517 non-null int64 \n", + " 2 month 517 non-null object \n", + " 3 day 517 non-null object \n", + " 4 ffmc 517 non-null float64\n", + " 5 dmc 517 non-null float64\n", + " 6 dc 517 non-null float64\n", + " 7 isi 517 non-null float64\n", + " 8 temp 517 non-null float64\n", + " 9 rh 517 non-null int64 \n", + " 10 wind 517 non-null float64\n", + " 11 rain 517 non-null float64\n", + " 12 area 517 non-null float64\n", + "dtypes: float64(8), int64(3), object(2)\n", + "memory usage: 52.6+ KB\n" + ] + } + ], "source": [ "# Load data\n", "columns = [\n", @@ -118,6 +168,451 @@ "fires_dt.info()\n" ] }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['coord_x', 'coord_y', 'month', 'day', 'ffmc', 'dmc', 'dc', 'isi',\n", + " 'temp', 'rh', 'wind', 'rain', 'area'],\n", + " dtype='object')" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fires_dt.keys()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
coord_xcoord_ymonthdayffmcdmcdcisitemprhwindrainarea
075marfri86.226.294.35.18.2516.70.00.0
174octtue90.635.4669.16.718.0330.90.00.0
274octsat90.643.7686.96.714.6331.30.00.0
386marfri91.733.377.59.08.3974.00.20.0
486marsun89.351.3102.29.611.4991.80.00.0
\n", + "
" + ], + "text/plain": [ + " coord_x coord_y month day ffmc dmc dc isi temp rh wind rain \\\n", + "0 7 5 mar fri 86.2 26.2 94.3 5.1 8.2 51 6.7 0.0 \n", + "1 7 4 oct tue 90.6 35.4 669.1 6.7 18.0 33 0.9 0.0 \n", + "2 7 4 oct sat 90.6 43.7 686.9 6.7 14.6 33 1.3 0.0 \n", + "3 8 6 mar fri 91.7 33.3 77.5 9.0 8.3 97 4.0 0.2 \n", + "4 8 6 mar sun 89.3 51.3 102.2 9.6 11.4 99 1.8 0.0 \n", + "\n", + " area \n", + "0 0.0 \n", + "1 0.0 \n", + "2 0.0 \n", + "3 0.0 \n", + "4 0.0 " + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fires_dt.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
coord_xcoord_ymonthdayffmcdmcdcisitemprhwindrainarea
count517.000000517.000000517517517.000000517.000000517.000000517.000000517.000000517.000000517.000000517.000000517.000000
uniqueNaNNaN127NaNNaNNaNNaNNaNNaNNaNNaNNaN
topNaNNaNaugsunNaNNaNNaNNaNNaNNaNNaNNaNNaN
freqNaNNaN18495NaNNaNNaNNaNNaNNaNNaNNaNNaN
mean4.6692464.299807NaNNaN90.644681110.872340547.9400399.02166318.88916844.2882014.0176020.02166312.847292
std2.3137781.229900NaNNaN5.52011164.046482248.0661924.5594775.80662516.3174691.7916530.29595963.655818
min1.0000002.000000NaNNaN18.7000001.1000007.9000000.0000002.20000015.0000000.4000000.0000000.000000
25%3.0000004.000000NaNNaN90.20000068.600000437.7000006.50000015.50000033.0000002.7000000.0000000.000000
50%4.0000004.000000NaNNaN91.600000108.300000664.2000008.40000019.30000042.0000004.0000000.0000000.520000
75%7.0000005.000000NaNNaN92.900000142.400000713.90000010.80000022.80000053.0000004.9000000.0000006.570000
max9.0000009.000000NaNNaN96.200000291.300000860.60000056.10000033.300000100.0000009.4000006.4000001090.840000
\n", + "
" + ], + "text/plain": [ + " coord_x coord_y month day ffmc dmc dc \\\n", + "count 517.000000 517.000000 517 517 517.000000 517.000000 517.000000 \n", + "unique NaN NaN 12 7 NaN NaN NaN \n", + "top NaN NaN aug sun NaN NaN NaN \n", + "freq NaN NaN 184 95 NaN NaN NaN \n", + "mean 4.669246 4.299807 NaN NaN 90.644681 110.872340 547.940039 \n", + "std 2.313778 1.229900 NaN NaN 5.520111 64.046482 248.066192 \n", + "min 1.000000 2.000000 NaN NaN 18.700000 1.100000 7.900000 \n", + "25% 3.000000 4.000000 NaN NaN 90.200000 68.600000 437.700000 \n", + "50% 4.000000 4.000000 NaN NaN 91.600000 108.300000 664.200000 \n", + "75% 7.000000 5.000000 NaN NaN 92.900000 142.400000 713.900000 \n", + "max 9.000000 9.000000 NaN NaN 96.200000 291.300000 860.600000 \n", + "\n", + " isi temp rh wind rain \\\n", + "count 517.000000 517.000000 517.000000 517.000000 517.000000 \n", + "unique NaN NaN NaN NaN NaN \n", + "top NaN NaN NaN NaN NaN \n", + "freq NaN NaN NaN NaN NaN \n", + "mean 9.021663 18.889168 44.288201 4.017602 0.021663 \n", + "std 4.559477 5.806625 16.317469 1.791653 0.295959 \n", + "min 0.000000 2.200000 15.000000 0.400000 0.000000 \n", + "25% 6.500000 15.500000 33.000000 2.700000 0.000000 \n", + "50% 8.400000 19.300000 42.000000 4.000000 0.000000 \n", + "75% 10.800000 22.800000 53.000000 4.900000 0.000000 \n", + "max 56.100000 33.300000 100.000000 9.400000 6.400000 \n", + "\n", + " area \n", + "count 517.000000 \n", + "unique NaN \n", + "top NaN \n", + "freq NaN \n", + "mean 12.847292 \n", + "std 63.655818 \n", + "min 0.000000 \n", + "25% 0.000000 \n", + "50% 0.520000 \n", + "75% 6.570000 \n", + "max 1090.840000 " + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fires_dt.describe(include='all')\n" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -129,17 +624,136 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "dt = fires_dt\n", + "X = dt.drop(columns=['area'])\n", + "Y = 1*(dt[['area']] >= 7).values.ravel()" + ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 517 entries, 0 to 516\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 coord_x 517 non-null int64 \n", + " 1 coord_y 517 non-null int64 \n", + " 2 month 517 non-null object \n", + " 3 day 517 non-null object \n", + " 4 ffmc 517 non-null float64\n", + " 5 dmc 517 non-null float64\n", + " 6 dc 517 non-null float64\n", + " 7 isi 517 non-null float64\n", + " 8 temp 517 non-null float64\n", + " 9 rh 517 non-null int64 \n", + " 10 wind 517 non-null float64\n", + " 11 rain 517 non-null float64\n", + "dtypes: float64(7), int64(3), object(2)\n", + "memory usage: 48.6+ KB\n" + ] + } + ], + "source": [ + "X.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,\n", + " 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,\n", + " 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0,\n", + " 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,\n", + " 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,\n", + " 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,\n", + " 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1,\n", + " 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,\n", + " 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0,\n", + " 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0,\n", + " 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0])" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "1*(dt[['area']] >= 7).values.ravel()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['coord_x', 'coord_y', 'ffmc', 'dmc', 'dc', 'isi', 'temp', 'rh', 'wind',\n", + " 'rain'],\n", + " dtype='object')" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numeric_features = X.select_dtypes(include=['int64', 'float64']).columns\n", + "numeric_features" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['month', 'day'], dtype='object')" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "categorical_features = X.select_dtypes(exclude=['int64', 'float64']).columns\n", + "categorical_features\n" + ] }, { "cell_type": "markdown", @@ -180,10 +794,4051 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
StandardScaler()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "StandardScaler()" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Create a StandardScaler object\n", + "from sklearn.preprocessing import StandardScaler\n", + "std_scaler = StandardScaler()\n", + "\n", + "# Select only numeric features for scaling\n", + "numeric_features = X.select_dtypes(include=['int64', 'float64']).columns\n", + "X_num = X[numeric_features]\n", + "\n", + "# fit the StandardScaler object with the returns data\n", + "std_scaler.fit(X_num)" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "coord_x 4.669246\n", + "coord_y 4.299807\n", + "ffmc 90.644681\n", + "dmc 110.872340\n", + "dc 547.940039\n", + "isi 9.021663\n", + "temp 18.889168\n", + "rh 44.288201\n", + "wind 4.017602\n", + "rain 0.021663\n", + "dtype: float64" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_num.mean()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "# Transform the return data using the fitted scaler\n", + "\n", + "scaled_X = std_scaler.transform(X_num)\n", + "scaled_X_df = pd.DataFrame(scaled_X, columns=X_num.columns)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "scaled_X_df.mean" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "coord_x 2.313778\n", + "coord_y 1.229900\n", + "ffmc 5.520111\n", + "dmc 64.046482\n", + "dc 248.066192\n", + "isi 4.559477\n", + "temp 5.806625\n", + "rh 16.317469\n", + "wind 1.791653\n", + "rain 0.295959\n", + "dtype: float64" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_num.std()" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAHBCAYAAABDrkBSAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAALVJJREFUeJzt3QuczPX+x/HPbpt1idW67W4ti0oqi5RNdYooUUqcLlJRIirFVuQc5XIUpaSLy+lGipQuKkpHFMolFKKSFdnKpVOxLceG/f0fn+/jMfPf2V2X1czOZ3Zez8fj99iZ32929rszv/nN+/e9/WI8z/MEAADAkNhwFwAAAKAwAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAc+IkAuXn58vPP/8slStXlpiYmHAXBwAAHAGdG/aPP/6QlJQUiY2NLXsBRcNJampquIsBAACOQnZ2tpx44ollL6BozYnvH6xSpUq4iwMAAI5ATk6Oq2DwfY+XuYDia9bRcEJAAQAgshxJ9ww6yQIAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwJw4KePS7p8d9OfcPOqyoD8nAAD4f9SgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAIj+gLFy4UDp06CApKSkSExMjM2fODNiu64pbRo8e7X9MWlpake2jRo0Kzn8EAACiL6Ds3r1bGjduLOPGjSt2+9atWwOWF1980QWQzp07Bzxu+PDhAY/r27fv0f8XAACgTIkr6S+0a9fOLQeTlJQUcP+dd96RVq1aSb169QLWV65cuchjAQAAQt4HZfv27TJ79mzp0aNHkW3apFOtWjVp2rSpa/7Zv38/7wgAADi6GpSSeOmll1xNSadOnQLW33XXXXLmmWdKYmKiLF68WAYNGuSaecaMGVPs8+Tl5bnFJycnJ5TFBgAAZTmgaP+Trl27Svny5QPWZ2Zm+m+np6dLuXLl5LbbbpORI0dKfHx8kefR9cOGDQtlUQEAQDQ08SxatEjWr18vt95662Efm5GR4Zp4Nm/eXOx2rWHZtWuXf8nOzg5BiQEAQJmvQXnhhRekWbNmbsTP4axatUpiY2OlZs2axW7XWpXialYAAEDZVOKAkpubK1lZWf77mzZtcgFD+5PUrl3b30dkxowZ8vjjjxf5/SVLlsiyZcvcyB7tn6L3+/fvLzfccIMcf/zxf/X/AQAA0RhQVqxY4cJF4f4k3bp1k8mTJ7vb06dPF8/zpEuXLkV+X2tCdPvQoUNdx9e6deu6gFKwXwoAAIhuMZ4miQijNTQJCQmuP0qVKlUO+di0+2cH/e9vHnVZ0J8z2OUMRRkBACit72+uxQMAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAACI/oCxcuFA6dOggKSkpEhMTIzNnzgzY3r17d7e+4HLppZcGPOa3336Trl27SpUqVaRq1arSo0cPyc3N/ev/DQAAiM6Asnv3bmncuLGMGzfuoI/RQLJ161b/8uqrrwZs13Cybt06mTt3rsyaNcuFnl69eh3dfwAAAMqcuJL+Qrt27dxyKPHx8ZKUlFTstm+++UbmzJkjy5cvl7POOsute/rpp6V9+/by2GOPuZoZAAAQ3ULSB+WTTz6RmjVrSoMGDaRPnz7y66+/+rctWbLENev4wolq06aNxMbGyrJly4p9vry8PMnJyQlYAABA2RX0gKLNO1OmTJF58+bJI488IgsWLHA1LgcOHHDbt23b5sJLQXFxcZKYmOi2FWfkyJGSkJDgX1JTU4NdbAAAEMlNPIdz3XXX+W83atRI0tPTpX79+q5WpXXr1kf1nIMGDZLMzEz/fa1BIaQAAFB2hXyYcb169aR69eqSlZXl7mvflB07dgQ8Zv/+/W5kz8H6rWifFh3xU3ABAABlV8gDyo8//uj6oCQnJ7v7LVq0kJ07d8rKlSv9j5k/f77k5+dLRkZGqIsDAADKYhOPzlfiqw1RmzZtklWrVrk+JLoMGzZMOnfu7GpDNm7cKAMGDJCTTjpJ2rZt6x7fsGFD10+lZ8+eMnHiRNm3b5/ceeedrmmIETwAAOCoalBWrFghTZs2dYvSviF6+8EHH5RjjjlG1qxZI1dccYWccsopbgK2Zs2ayaJFi1wzjc/UqVPl1FNPdX1SdHjx+eefL88++yzvCAAAOLoalJYtW4rneQfd/uGHHx72ObSmZdq0aSX90wAAIEpwLR4AAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAABD5AWXhwoXSoUMHSUlJkZiYGJk5c6Z/2759+2TgwIHSqFEjqVSpknvMTTfdJD///HPAc6SlpbnfLbiMGjUqOP8RAACIvoCye/duady4sYwbN67Itj179sgXX3whDzzwgPv51ltvyfr16+WKK64o8tjhw4fL1q1b/Uvfvn2P/r8AAABlSlxJf6Fdu3ZuKU5CQoLMnTs3YN0zzzwjzZs3ly1btkjt2rX96ytXrixJSUlHU2YAAFDGhbwPyq5du1wTTtWqVQPWa5NOtWrVpGnTpjJ69GjZv3//QZ8jLy9PcnJyAhYAAFB2lbgGpST27t3r+qR06dJFqlSp4l9/1113yZlnnimJiYmyePFiGTRokGvmGTNmTLHPM3LkSBk2bFgoiwoAAKIhoGiH2WuuuUY8z5MJEyYEbMvMzPTfTk9Pl3Llysltt93mgkh8fHyR59IAU/B3tAYlNTU1VEUHAABlMaD4wskPP/wg8+fPD6g9KU5GRoZr4tm8ebM0aNCgyHYNLcUFFwAAUDbFhSqcbNiwQT7++GPXz+RwVq1aJbGxsVKzZs1gFwcAAERDQMnNzZWsrCz//U2bNrmAof1JkpOT5e9//7sbYjxr1iw5cOCAbNu2zT1Ot2tTzpIlS2TZsmXSqlUrN5JH7/fv319uuOEGOf7444P73wEAgOgIKCtWrHDhwsfXN6Rbt24ydOhQeffdd939Jk2aBPye1qa0bNnSNdVMnz7dPVZH59StW9cFlIJ9TAAAQHQrcUDRkKEdXw/mUNuUjt5ZunRpSf8sAACIIlyLBwAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAARH5AWbhwoXTo0EFSUlIkJiZGZs6cGbDd8zx58MEHJTk5WSpUqCBt2rSRDRs2BDzmt99+k65du0qVKlWkatWq0qNHD8nNzf3r/w0AAIjOgLJ7925p3LixjBs3rtjtjz76qDz11FMyceJEWbZsmVSqVEnatm0re/fu9T9Gw8m6detk7ty5MmvWLBd6evXq9df+EwAAUGbElfQX2rVr55biaO3J2LFjZfDgwXLllVe6dVOmTJFatWq5mpbrrrtOvvnmG5kzZ44sX75czjrrLPeYp59+Wtq3by+PPfaYq5kBAADRLah9UDZt2iTbtm1zzTo+CQkJkpGRIUuWLHH39ac26/jCidLHx8bGuhqX4uTl5UlOTk7AAgAAyq6gBhQNJ0prTArS+75t+rNmzZoB2+Pi4iQxMdH/mMJGjhzpgo5vSU1NDWaxAQCAMREximfQoEGya9cu/5KdnR3uIgEAgEgJKElJSe7n9u3bA9brfd82/bljx46A7fv373cje3yPKSw+Pt6N+Cm4AACAsiuoAaVu3bouZMybN8+/TvuLaN+SFi1auPv6c+fOnbJy5Ur/Y+bPny/5+fmurwoAAECJR/HofCVZWVkBHWNXrVrl+pDUrl1b+vXrJyNGjJCTTz7ZBZYHHnjAjczp2LGje3zDhg3l0ksvlZ49e7qhyPv27ZM777zTjfBhBA8AADiqgLJixQpp1aqV/35mZqb72a1bN5k8ebIMGDDAzZWi85poTcn555/vhhWXL1/e/ztTp051oaR169Zu9E7nzp3d3CkAAAAqxtPJSyKMNhvpaB7tMHu4/ihp988O+t/fPOqyoD9nsMsZijICAFBa398RMYoHAABEFwIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAACg7AeUtLQ0iYmJKbLccccdbnvLli2LbOvdu3ewiwEAACJYXLCfcPny5XLgwAH//bVr18rFF18sV199tX9dz549Zfjw4f77FStWDHYxAABABAt6QKlRo0bA/VGjRkn9+vXlwgsvDAgkSUlJwf7TAACgjAhpH5Q///xTXnnlFbnllltcU47P1KlTpXr16nLGGWfIoEGDZM+ePYd8nry8PMnJyQlYAABA2RX0GpSCZs6cKTt37pTu3bv7111//fVSp04dSUlJkTVr1sjAgQNl/fr18tZbbx30eUaOHCnDhg0LZVEBAEC0BJQXXnhB2rVr58KIT69evfy3GzVqJMnJydK6dWvZuHGjawoqjtayZGZm+u9rDUpqamooiw4AAMpiQPnhhx/ko48+OmTNiMrIyHA/s7KyDhpQ4uPj3QIAAKJDyPqgTJo0SWrWrCmXXXbZIR+3atUq91NrUgAAAEJWg5Kfn+8CSrdu3SQu7v//hDbjTJs2Tdq3by/VqlVzfVD69+8vF1xwgaSnp/OOAACA0AUUbdrZsmWLG71TULly5dy2sWPHyu7du10/ks6dO8vgwYNDUQwAABChQhJQLrnkEvE8r8h6DSQLFiwIxZ8EAABlCNfiAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAObEhbsAiBxp988O+nNuHnVZ0J8TABD5qEEBAADmEFAAAEDZDyhDhw6VmJiYgOXUU0/1b9+7d6/ccccdUq1aNTnuuOOkc+fOsn379mAXAwAARLCQ1KCcfvrpsnXrVv/y6aef+rf1799f3nvvPZkxY4YsWLBAfv75Z+nUqVMoigEAACJUSDrJxsXFSVJSUpH1u3btkhdeeEGmTZsmF110kVs3adIkadiwoSxdulTOOeecUBQHAABEmJDUoGzYsEFSUlKkXr160rVrV9myZYtbv3LlStm3b5+0adPG/1ht/qldu7YsWbIkFEUBAAARKOg1KBkZGTJ58mRp0KCBa94ZNmyY/O1vf5O1a9fKtm3bpFy5clK1atWA36lVq5bbdjB5eXlu8cnJyQl2sQEAQFkOKO3atfPfTk9Pd4GlTp068vrrr0uFChWO6jlHjhzpgg4AAIgOIR9mrLUlp5xyimRlZbl+KX/++afs3Lkz4DE6iqe4Pis+gwYNcv1XfEt2dnaoiw0AAMpyQMnNzZWNGzdKcnKyNGvWTI499liZN2+ef/v69etdH5UWLVoc9Dni4+OlSpUqAQsAACi7gt7Ec++990qHDh1cs44OIR4yZIgcc8wx0qVLF0lISJAePXpIZmamJCYmuqDRt29fF04YwQMAAEIWUH788UcXRn799VepUaOGnH/++W4Isd5WTzzxhMTGxroJ2rTja9u2bWX8+PHBLgYAAIhgQQ8o06dPP+T28uXLy7hx49wCAABQHK7FAwAAzCGgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAwJy7cBQCCKe3+2UF/QTePuizozwkAODRqUAAAgDkEFAAAYA4BBQAAmENAAQAA5hBQAACAOQQUAABgDgEFAACYQ0ABAABlP6CMHDlSzj77bKlcubLUrFlTOnbsKOvXrw94TMuWLSUmJiZg6d27d7CLAgAAIlTQA8qCBQvkjjvukKVLl8rcuXNl3759cskll8ju3bsDHtezZ0/ZunWrf3n00UeDXRQAABChgj7V/Zw5cwLuT5482dWkrFy5Ui644AL/+ooVK0pSUlKw/zwAACgDQt4HZdeuXe5nYmJiwPqpU6dK9erV5YwzzpBBgwbJnj17Ql0UAAAQIUJ6scD8/Hzp16+fnHfeeS6I+Fx//fVSp04dSUlJkTVr1sjAgQNdP5W33nqr2OfJy8tzi09OTk4oiw0AAMpyQNG+KGvXrpVPP/00YH2vXr38txs1aiTJycnSunVr2bhxo9SvX7/YjrfDhg0LZVEBAEA0NPHceeedMmvWLPn444/lxBNPPORjMzIy3M+srKxit2sTkDYV+Zbs7OyQlBkAAJTRGhTP86Rv377y9ttvyyeffCJ169Y97O+sWrXK/dSalOLEx8e7BQAARIe4UDTrTJs2Td555x03F8q2bdvc+oSEBKlQoYJrxtHt7du3l2rVqrk+KP3793cjfNLT04NdHAAAEIGCHlAmTJjgn4ytoEmTJkn37t2lXLly8tFHH8nYsWPd3CipqanSuXNnGTx4cLCLAgAAIlRImngORQOJTuYGAABwMFyLBwAAmENAAQAA5hBQAABAdE3UBqB4affPDupLs3nUZbzUAMoUalAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDmM4gFQKiONFKONABwpalAAAIA5BBQAAGAOAQUAAJhDHxQAEYt+MkDZRQ0KAAAwh4ACAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzGGqewCIsCn5N4+6TIKNywbAGmpQAACAOQQUAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGAOAQUAAJhDQAEAAOYQUAAAgDkEFAAAYA4BBQAAmENAAQAA5oQ1oIwbN07S0tKkfPnykpGRIZ9//nk4iwMAAKL9asavvfaaZGZmysSJE104GTt2rLRt21bWr18vNWvWDFexAABGRcoVlyPh6tWRIGwBZcyYMdKzZ0+5+eab3X0NKrNnz5YXX3xR7r///nAVCwCAMi8tAsJeWALKn3/+KStXrpRBgwb518XGxkqbNm1kyZIlRR6fl5fnFp9du3a5nzk5OYf9W/l5eyTYjuTvllSwyxkJZQxFOSOhjIr32+brqHi/7b6WvN+R/1r6HuN53uGf0AuDn376SUvmLV68OGD9fffd5zVv3rzI44cMGeIez8JrwD7APsA+wD7APiAR/xpkZ2cfNiuErYmnJLSmRfur+OTn58tvv/0m1apVk5iYmKD8DU11qampkp2dLVWqVBGLKCOvpTWRsE9GSjkpI69lNOyTnufJH3/8ISkpKYd9bFgCSvXq1eWYY46R7du3B6zX+0lJSUUeHx8f75aCqlatGpKy6Ztg9QDmQxl5La2JhH0yUspJGXkty/o+mZCQYHeYcbly5aRZs2Yyb968gFoRvd+iRYtwFAkAABgStiYebbLp1q2bnHXWWdK8eXM3zHj37t3+UT0AACB6hS2gXHvttfLLL7/Igw8+KNu2bZMmTZrInDlzpFatWmEpjzYhDRkypEhTkiWUkdfSmkjYJyOlnJSR19Ka+DB/bmK0p2xY/jIAAMBBcC0eAABgDgEFAACYQ0ABAADmEFAAAIA5BBQAAGBOREx1DxyNd99994gfe8UVV/AiH6GdO3fK559/Ljt27HATLBZ00003mXkdFy5cKOeee67ExQUe5vbv3y+LFy+WCy64QMLpwIED8tlnn0l6enrIZsYOBr04q5Y1MTExYL1ebkRfWysz8+q+mJWVVex+Ge73GkeHYcYR4Pfff5cXXnhBvvnmG3e/YcOGcssttxQ5YITLvn375NRTT5VZs2a5slmhV8g+Eno9Jz0AW6DlePvttwPe644dOxb5kg2X9957T7p27Sq5ubnui6ngtbD0tn5pWaGX09i6davUrFkzYP2vv/7q1ll4z8uXL+/e67p164pV7dq1kw4dOsjtt98esH7ixInuJOD999+XcFu6dKlcf/318sMPPxS5Sq6lzzdKJqqbeI4//nj3JV940YsQnnDCCXLhhRfKpEmTwn4WqAevp556ygUVXZ5++mm3TrdZcOyxx8revXvFGj2LOpLFysFr3bp1csopp7gZljWk6NK9e3c5+eSTZe3atWLBPffc48KxBhStSfHtk7pYCidKv6iKu5ioBpRKlSqJBWeccYZ8//33YtmyZcukVatWRda3bNnSbbOgd+/eblZy/Zzofmh1v9TvlClTpsj//vc/seykk06SoUOHynfffRfWckR1DcoTTzwhDz30kDtD0On2lVZd64y2/fv3l02bNsnLL7/sAkHPnj3DUsZGjRq56xNNmDDBnREq/ULVsxmtpv7qq6/EgocfftjtzM8//7yZs/1Io+9zjRo15KWXXnLhWekBVkOKzrqs73e46Re77nP16tUTqzp16uR+vvPOO3LppZcGzIKpn501a9ZIgwYN3Oc83LQMerX2f/3rX+76ZIWDk4XmEy2T1lDosagg3Q8yMjJkz549YqGMq1evdl+slvXr10+mTZsmeXl5cs0110iPHj3knHPOEYvfjdOmTZMvvvhCzjzzTLnhhhvc7O/FXcw3lKI6oHTu3Fkuvvhil74L+ve//y3/+c9/5M0333Th5Nlnnw1bEKhQoYKsWrXKHVALWr9+vbs8gJUkftVVV7mLPR533HHuQFb4QPvWW29JOA0fPvyQ2/WSC+Gm7/WKFSvk9NNPD1ivZ4Vnn322ifdav/yvu+46d3C1ync9Lw16Wk59XQteqDQtLc2dcOhV1S01Qxas7fHV/lio3dPaE63p0WNhQXfccYcLe4sWLZJwu+iii2TAgAEukFqnfaC0aUz3zw8++MCFKq2VvPHGG8N2qZeD0ZPOqVOnyquvvupO2HVf0LBSWn3Nojqg6JepfvkXTt3a0Uq//LUae+PGja4Tm17IMBzOO+88ue+++1w/hIJmzpwpo0aNcmc2FhzuIo/hbipr2rRpkX4z+oHT2p769eu7M4Vwa9y4sTtz0YNtQfPnz5e7777bRG2Z9oXSsKfvtwZRbd6z2tl42LBh7rNTsWJFsWrBggWHbRIIN+3I26ZNGxeSW7du7dbpycjy5cvdidzf/va3cBfRNYcOHjzYvd/F7Zd6DLdIO/TqCbDW5GsYbd++vdx1111FjgEW6HdNnz59XCgtteDsRbHU1FRvzJgxRdbrOt2mVq9e7dWqVcsLl+nTp3u1a9f2Ro8e7S1atMgtejstLc1t0/L5FpTMrl27vKuuusqbMmVKWMvgW2bPnu2dfvrp3owZM7zs7Gy36O1GjRq5bRbExMQcdImNjfUs+f77773vvvuuyHpdt2nTprCUKVJ9+eWXXpcuXbzTTjvNa9asmXfzzTcX+9qGy8H2R4v7pc+yZcu83r17e1WrVnXH+AcffNDr0aOHV6FCBe+ee+7xLJXz7rvv9pKSkryKFSt61157ban97aiuQXnuuedcItTU6uuDomcF2itde6hr++Djjz/u+qW89tprJkeiaDWwpergSKO1EjpCYfPmzWF7fwtX7SvfuoL3eX9LRmsftOpcOx0X9Morr7i+Up988olYHKV32mmnuRoqK6P0IoGO3jmUOnXqiJUaE+3XqDXKGzZscMeeW2+9Vdq2bev/zH/66aeuqSo3N9dM047W6OjoPW3i1ZaH0hLVAcVXffnMM8+4Ph1K+3r07dvXzZ8QCR88Sx/CN954Q15//XXZsmWL/PnnnwHbLDShFEcPBnqQ0C8Ji1X81qr7I4l2MNX9rrgmXB3xoaOQwk1H4un+l5CQ4MqkVq5c6cqmQ7qtzN+hTd36paojjsaOHeuGaWv/idq1axfpM4WD0z5Q2qSswVk7v2un+MJycnLkyiuvlI8//jhsL6WeOGmTng7d1j5n4eobE/UBBcGhw6D/+c9/ug+dtqnqGaAe1LRGSjvTaRtruMtXkOZynSNDz2b0i197rKNsdDb20S99rSUp3P9IA4AOkf3jjz8k3CJhlJ6GaB3pqP3hNFBpTY+O4tI+cNqpW09MrPj666+LPUGy0jdKOxRb6LNzOFq7o9MbhFtUBxTdkQ9Fzw4s0C9RbXLSqrYlS5a4mhI9i9G5UDRpW6ATtQ0ZMkS6dOkilStXdkP+9CCmX1g6D4HWUoVT4Ymw9AxBz1606lKHeWqZrRzAdBSZnqnOmDHDzcej77+W//zzzw938SKis7GP1kzoCB6tpi745a/DJbXTu9YAhFskjNLTAHX11VdLZmZmwGdbm761yv/HH38MdxHd50VHEmqg8zV7K1+zCc2jR0fDfMGmRx1yXKq8KObrQHWwxYLx48d71atX90aMGOE6T23cuNGtnzRpkteyZUvPCi3b5s2b3e0aNWp4q1atcre1I11iYmKYSxcZ3njjDfc63nrrrV58fLz/vX766ae9du3aeVZZ6GxcnHXr1nnVqlXz6tev73Xv3t0telv3z6+++sqz4Nxzz/XefvvtIut1XUZGhmdBpUqVXIdjddxxx/n3S+1orPupBZdffrl35ZVXer/88osr49dff+0GFDRv3txbuHChZ4l2fL/66qvd+9u0adOAxYrt27e77xf9jjz++OPdorcvuugib8eOHaVWjqieSfbLL790Z3y+RWdF1JoKnc1Tz14t0LkHtDOvNp/4zgKVtldbqP710Ql8fDM2as2Tb/iznl2Hq5JOz+60Pdd3+1CLjut/5JFH3HVHwmXEiBFu/9P3u+AwSa1at1QzUVxfDx3S+8ADD4glesanQyK1xkQ7J2qTjr7P3377rZvXwwIdUqpDyB977DHXH0oXva0TReqi5fct4aLXCdLm0OKOn1rDZ4HWLGvzo85to7WjumiN48iRI91rbIU2NWvzt/bp0NdPB2fozOVaA6TNaFb07dvXddLV2a31uK6Lzsekx9PSfD2jespPnXeiMP3iT0lJkdGjR/tnpAwn/YIvXK2udHbMcM3NUhxtKtHJh7Ss+gHUg6u2TWsbdbheR+2D4Kvi1duHojM7ajjQTtMluchgMGm1fnGdIrXsFjp0HooGu3CGu4PROVB0NExycrK7ryMQCgb9cNMmUaWTjBW3zcIoPe0kOXDgQHfSpuXQy0Po5+Tee+81c3FIfW18zbQaUn7++WfXbKbN4b4BEBaMHz/e9dHT93by5MnufS/YFG7FnDlz5KOPPgq4tpoG/nHjxskll1xSauWI6oByMLpja+dOC7TvgbZRFx6hozuQpQvz6YfOdwVR7RSrBwk9iGnntMIz9ZaWgpPDHclEcdrBTnuuh7MWSkeY6EynBelZtZWp5Q/V2djSGaDScKzDN7Wfh28aAZ0ITy/LoBOMlXp7+kFOQKzT10s/06mpqS4I6BeVzoaqw051cjQLtEZM+8bo8VKn33/00UfdiBk9Lln57Pj6PfpGiOp+6euorbPI6pT34e6r56PH8sKT3SldV/hK0aEU1QHFV/1f+GCrF0my0INZacc0PTjoxfi0fNoxTTv9adWlzuVghVapas95bYrQ6nT98Onsk74wpR0WIyGYhvN6Nzr9ulb3v/jii+5MVc8Ctepaz1StNJ/oF3xxnY11rhHtbGyJ1uJpQNYmM9/1ofSLVeed0GuiWLjYpu/Eo7jRJ7oPWPjc6Be9voZ6lq/Nylr1rzWlVo6RSoOSr0ZZmxv1ddPRMtp8Mn36dLHWFK7vu68pXGvyw9kUfrAacT0W6XeNtiion376yX2mfLMJl4aoHsVTeJIspS+HninoTq291y3QCXM0NOmwXaXtvnpfJ5KzQkOIngXolWILY5Kxg9O+BXr255uQT4dja/j0XYBNm/I0oOjF5FAyGpK1nV9HmBWkYUCbci1c5M7q6BM9MTpSY8aMEYs0COhFN4u7onW4aDjW7xcd8ajNJTo1v/Yx8zWF64R9FmRnZ7twr31QtLxKA7QOi9cm8BNPPLFUyhHVAaXwJFm+s0Gd2MnKFXl1mKG+RdqWrgdU7aikTSdazarV11bo2ZS2TepZlrULXlmm/SG01k4nvtKqaG1a1LZ0berRM1V9n0tz5sbi6IFT28u1M+zh+hNpWXXiLm3WO1y/n1DT/VCbngq3mX/44Yeu78T27dsl3PRMX/cBrQ3V5gntqK9frPfcc4/rLBuuOTP0onAFac2o1j75hkPrTKNabr0Cs14rKhwicb/U5hFdfN8vOkO5Hs/1+KllK65ZJVz0e0evueQbZqxdCny14qXFxrdwmPhm5ixYvaoziuqHz8rkPjrPiX74dOfV8mmZdCf+73//685cdKp+C/Rgr2ddhJOSj5DQ6l0NKDrdvh68tEpdg4kVkdbZ2EdH72gto37R+9r9tVx61urrnBpu2oSnX/C+0Sf6pV9w9InWAIVDwVlM9TijoVmvvqs1EkqPk9oZPpyTjkXifhlJTeHz5893i5ZTj0u6L/omtNRm6FLhRTEdz9+4ceOAC0sVnBvFAp3HYe3ate72c88956Wnp3sHDhzwXn/9de/UU0/1rNCLhz3//PPhLkbE6dmzp5tLQi/+qPucXjSsbt26xS6RQucf0YuKhVteXp531113eeXKlfN/pvW17tevn7d3717PAr1QnG+OkXr16nnz5893t7OystycOBakpKT4j0EF6VwyycnJXqSwsF9+8MEHbl4r6xfbHDp0qCuPziOj88t07NgxYCktUV2Dop2AdMSEDqcqrnrVAm3W8Q2f05EHWpuiKVx7fJfkOj2hpr3PdbZJnQm1uMudW5qLwBIdZaDvqTbp6GukHWWtzGobqZ2NfbQm6sknn3S1Eb7+WzrbrTaXWhEJo090MMEvv/xSZL2us3C5gEjaL3V+kWuuucZ8U/jEiRNd85n2KwwrL4pp7cTq1avd7SpVqnjffvutuz1v3jyvSZMmngWNGjXynnzySW/Lli2ujIsXL3brV6xY4dWqVcuzQmtP4uLi3CyOderUcTUCviWSzv7DSWc6zcnJCXcxUIrmzJnjvfnmm+72hg0bvAYNGrizaT3L1uOQBTfeeKP7HGs5s7Oz3aKzHuvn+qabbgp38SJK5cqVXe2YdYmJiSbKGdUBJRKqV3Va5GOPPdZVt1188cX+9Q8//LB36aWXelZoWHrooYdc8xOAo/frr796+fn5Zl7C3bt3e3369HHNY76mMm0203W5ubnhLl5EiZSm8AEDBnjDhw8PdzG8qB7Fox28tDmnY8eO7rLS2vFLx9Nr9apeJElHzFiwbds2N9JDx8v7hqPqfCjae73wEMpw0dk6dQSKVqEDKHt0npGCTWWVKlUKd5EijjbZa1O4jha13BR+9913y5QpUyQ9Pd0thctZWkPLozqg6HBD/dD5+gBcfvnlbgSPTu6jw790shocGZ3ARz90//jHP3jJAKAYOs+JjsgsX768+54pOEeL3tZ5cSxoVWiYeUFaztIaWh7VASVSJveJBJr8NXFrLU84EzcAWKUzyeqx8v777/fXhuPgCCgoU4kbAKyiKbxkCCgAAJQCmsJLJqrnQQEAoLTotZV0rhvt/0hT+OFRgwIAQCmgKbxkCCgAAMAcuhEDAABzCCgAAMAcAgoAADCHgAKgzBg6dKg0adIk3MUAEAQEFAARSScAnDlzZriLASBECCgAAMAcAgqAv6Rly5bSt29f6devn7uOVa1ateS5555zF+K8+eabpXLlynLSSSfJBx984P+dBQsWSPPmzSU+Pl6Sk5PdtUn2798f8Jx6zZIBAwa46cH1GibafOOTlpbmfl511VWuJsV33+fll1926xISEuS6666TP/74g3cZiDAEFAB/2UsvvSTVq1eXzz//3IWVPn36uMvKn3vuufLFF1/IJZdcIjfeeKO73PxPP/0k7du3l7PPPltWr14tEyZMcFd5HTFiRJHnrFSpkixbtszNvjl8+HCZO3eu27Z8+XL3c9KkSbJ161b/fbVx40bX9DNr1iy3aBgaNWoU7zIQYZioDcBforUdOoX3okWL3H29rTUXnTp1cle4Vtu2bXM1JUuWLJH33ntP3nzzTfnmm2/8Vw0fP368DBw4UHbt2uWu8lr4OZXWuFx00UX+sKG/+/bbb0vHjh39j9FaltGjR7u/pzU3SmthFi5cKEuXLuWdBiIINSgA/jK9rojPMcccI9WqVZNGjRr512mzj9qxY4cLJi1atPCHE3XeeedJbm6u/Pjjj8U+p9KAo79/ONq04wsnJfk9ALYQUAD8Zccee2zAfQ0fBdf5wkh+fv5fes4j+f2j/T0AthBQAJSqhg0buqYez/P86z777DNX63HiiSce8fNoENFmIABlEwEFQKm6/fbbJTs723Wm/fbbb+Wdd96RIUOGSGZmput/cqS0KWfevHmuv8nvv/8e0jIDKH0EFACl6oQTTpD333/fjfhp3Lix9O7dW3r06CGDBw8u0fM8/vjjblRPamqqNG3aNGTlBRAejOIBAADmUIMCAADMIaAAAABzCCgAAMAcAgoAADCHgAIAAMwhoAAAAHMIKAAAwBwCCgAAMIeAAgAAzCGgAAAAcwgoAADAHAIKAAAQa/4PY6F4ZI/NkMEAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# one hot encorder\n", + "X['month'].value_counts().plot(kind='bar')" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 mar\n", + "1 oct\n", + "2 oct\n", + "3 mar\n", + "4 mar\n", + " ... \n", + "512 aug\n", + "513 aug\n", + "514 aug\n", + "515 aug\n", + "516 nov\n", + "Name: month, Length: 517, dtype: object" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X['month']" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 fri\n", + "1 tue\n", + "2 sat\n", + "3 fri\n", + "4 sun\n", + " ... \n", + "512 sun\n", + "513 sun\n", + "514 sun\n", + "515 sat\n", + "516 tue\n", + "Name: day, Length: 517, dtype: object" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X['day']" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAHBCAYAAADJgdkTAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjgsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvwVt1zgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAH/lJREFUeJzt3QeUVdW5B/BvkKYIKkZaBLFjL6CIGmOUJVFs0ViWZtm7UcHEQmzPChpbbFiiKPaYRGxLNAtiC9hAjcaCDZloAKPCKAgYmbf2ee9OmAELyuwp9/db66y599zLzOHMnXv/Z+9v711RXV1dHQAAmbTI9YMAAIQPACA7LR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWLaORmT9/fnzwwQfRvn37qKioaOjDAQC+hTRt2KeffhrdunWLFi1aNK3wkYJH9+7dG/owAIDvoLKyMlZeeeWmFT5Si0fp4Dt06NDQhwMAfAtVVVVF40Hpc7xJhY9SV0sKHsIHADQt36ZkQsEpAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZtYxmrOepD0VjMXnYwIY+BABoFLR8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZNUy74+jseh56kPRWEweNrChDwGAjLR8AABZCR8AQFbCBwCQlZoPqEM9DED90vIBAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AACNN3x8+eWXccYZZ8Sqq64aSy+9dKy++upx7rnnRnV1dc1z0u0zzzwzunbtWjynf//+8eabb9bHsQMAzT18XHjhhTF8+PC46qqr4rXXXivuX3TRRXHllVfWPCfdv+KKK+Laa6+NZ555Jtq1axcDBgyIOXPm1MfxAwDNeWG5cePGxW677RYDBw4s7vfs2TPuvPPOePbZZ2taPS6//PI4/fTTi+clI0eOjM6dO8eoUaNi3333rY//AwDQXFs+ttxyyxgzZkxMmjSpuP/SSy/FU089FTvuuGNx/913342pU6cWXS0lyy23XPTt2zfGjx+/yO85d+7cqKqqqrUBAM3XYrV8nHrqqUU46NWrVyy11FJFDcj5558f+++/f/F4Ch5JaulYULpfeqyuoUOHxtlnn/3d/wcAQPNt+fjDH/4Qt99+e9xxxx0xceLEuOWWW+Liiy8uvn5XQ4YMiZkzZ9ZslZWV3/l7AQDNrOXjpJNOKlo/SrUbG2ywQbz33ntF68WBBx4YXbp0KfZPmzatGO1Sku5vvPHGi/yebdq0KTYAoDwsVsvH7Nmzo0WL2v8kdb/Mnz+/uJ2G4KYAkupCSlI3TRr10q9fvyV1zABAubR87LLLLkWNR48ePWK99daLF154IS699NI45JBDiscrKipi0KBBcd5558Waa65ZhJE0L0i3bt1i9913r6//AwDQXMNHms8jhYljjjkmpk+fXoSKI488sphUrOTkk0+OWbNmxRFHHBEzZsyIrbfeOkaPHh1t27atj+MHAJpz+Gjfvn0xj0favkpq/TjnnHOKDQCgLmu7AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQVcu8Pw5oynqe+lA0FpOHDWzoQwC+Iy0fAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWbXM++MAmp+epz4UjcnkYQMb+hDga2n5AACyEj4AgKyEDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwDISvgAALISPgCAxj29+vvvvx+nnHJKPPzwwzF79uxYY401YsSIEdGnT5/i8erq6jjrrLPihhtuiBkzZsRWW20Vw4cPjzXXXLM+jh+ARszU83zvlo9PPvmkCBOtWrUqwserr74al1xySaywwgo1z7noooviiiuuiGuvvTaeeeaZaNeuXQwYMCDmzJmzOD8KAGimFqvl48ILL4zu3bsXLR0lq666as3t1Opx+eWXx+mnnx677bZbsW/kyJHRuXPnGDVqVOy7775L8tgBgObe8nH//fcX3St77bVXdOrUKTbZZJOie6Xk3XffjalTp0b//v1r9i233HLRt2/fGD9+/CK/59y5c6OqqqrWBgA0X4sVPt55552a+o1HHnkkjj766Dj++OPjlltuKR5PwSNJLR0LSvdLj9U1dOjQIqCUttSyAgA0X4sVPubPnx+bbrppXHDBBUWrxxFHHBGHH354Ud/xXQ0ZMiRmzpxZs1VWVn7n7wUANLPw0bVr11h33XVr7VtnnXViypQpxe0uXboUX6dNm1brOel+6bG62rRpEx06dKi1AQDN12KFjzTS5Y033qi1b9KkSbHKKqvUFJ+mkDFmzJiax1MNRxr10q9fvyV1zABAuYx2GTx4cGy55ZZFt8vee+8dzz77bFx//fXFllRUVMSgQYPivPPOK+pCUhg544wzolu3brH77rvX1/8BAGiu4WOzzTaLe++9t6jTOOecc4pwkYbW7r///jXPOfnkk2PWrFlFPUiaZGzrrbeO0aNHR9u2bevj+AGA5j7D6c4771xsXyW1fqRgkjYAgLqs7QIAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFYt8/44ACDpeepD0ZhMHjYw28/S8gEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwAgfAAAzZeWDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwDISvgAALISPgCArIQPACAr4QMAyEr4AACyEj4AgKyEDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwDISvgAALISPgCArIQPACAr4QMAyEr4AACyEj4AgKyEDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwBoOuFj2LBhUVFREYMGDarZN2fOnDj22GNjxRVXjGWXXTb23HPPmDZt2pI4VgCgnMPHc889F9ddd11suOGGtfYPHjw4Hnjggbjnnnvi8ccfjw8++CD22GOPJXGsAEC5ho/PPvss9t9//7jhhhtihRVWqNk/c+bMuPHGG+PSSy+N7bbbLnr37h0jRoyIcePGxdNPP70kjxsAKKfwkbpVBg4cGP3796+1f8KECfHFF1/U2t+rV6/o0aNHjB8/fpHfa+7cuVFVVVVrAwCar5aL+w/uuuuumDhxYtHtUtfUqVOjdevWsfzyy9fa37lz5+KxRRk6dGicffbZi3sYAEA5tHxUVlbGCSecELfffnu0bdt2iRzAkCFDiu6a0pZ+BgDQfC1W+EjdKtOnT49NN900WrZsWWypqPSKK64obqcWjnnz5sWMGTNq/bs02qVLly6L/J5t2rSJDh061NoAgOZrsbpdtt9++3j55Zdr7Tv44IOLuo5TTjklunfvHq1atYoxY8YUQ2yTN954I6ZMmRL9+vVbskcOADT/8NG+fftYf/31a+1r165dMadHaf+hhx4aJ554YnTs2LFoxTjuuOOK4LHFFlss2SMHAMqj4PSbXHbZZdGiRYui5SONZBkwYEBcc801S/rHAADlGj4ee+yxWvdTIerVV19dbAAAdVnbBQDISvgAALISPgCArIQPACAr4QMAyEr4AACyEj4AgKyEDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwDISvgAALISPgCArIQPACAr4QMAyEr4AACyEj4AgKyEDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwDISvgAALISPgCArIQPACAr4QMAyEr4AACyEj4AgKyEDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwDISvgAALISPgCArIQPACAr4QMAyEr4AACyEj4AgKyEDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwDISvgAALISPgCArIQPACAr4QMAyEr4AACyEj4AgKyEDwAgK+EDAMhK+AAAshI+AICshA8AICvhAwDISvgAALISPgCAxhs+hg4dGptttlm0b98+OnXqFLvvvnu88cYbtZ4zZ86cOPbYY2PFFVeMZZddNvbcc8+YNm3akj5uAKAcwsfjjz9eBIunn346/vKXv8QXX3wRO+ywQ8yaNavmOYMHD44HHngg7rnnnuL5H3zwQeyxxx71cewAQBPUcnGePHr06Fr3b7755qIFZMKECbHNNtvEzJkz48Ybb4w77rgjtttuu+I5I0aMiHXWWacILFtsscWSPXoAoLxqPlLYSDp27Fh8TSEktYb079+/5jm9evWKHj16xPjx4xf5PebOnRtVVVW1NgCg+frO4WP+/PkxaNCg2GqrrWL99dcv9k2dOjVat24dyy+/fK3ndu7cuXjsq+pIlltuuZqte/fu3/WQAIDmHD5S7ccrr7wSd9111/c6gCFDhhQtKKWtsrLye30/AKAZ1XyU/PKXv4wHH3wwnnjiiVh55ZVr9nfp0iXmzZsXM2bMqNX6kUa7pMcWpU2bNsUGAJSHxWr5qK6uLoLHvffeG2PHjo1VV1211uO9e/eOVq1axZgxY2r2paG4U6ZMiX79+i25owYAyqPlI3W1pJEs9913XzHXR6mOI9VqLL300sXXQw89NE488cSiCLVDhw5x3HHHFcHDSBcAYLHDx/Dhw4uv2267ba39aTjtQQcdVNy+7LLLokWLFsXkYmkky4ABA+Kaa65xtgGAxQ8fqdvlm7Rt2zauvvrqYgMAqMvaLgBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8AABZCR8AQFbCBwCQlfABAGQlfAAAWQkfAEBWwgcAkJXwAQBkJXwAAFkJHwBAVsIHAJCV8AEANI/wcfXVV0fPnj2jbdu20bdv33j22Wfr60cBAOUePu6+++448cQT46yzzoqJEyfGRhttFAMGDIjp06fXx48DAMo9fFx66aVx+OGHx8EHHxzrrrtuXHvttbHMMsvETTfdVB8/DgBoQlou6W84b968mDBhQgwZMqRmX4sWLaJ///4xfvz4hZ4/d+7cYiuZOXNm8bWqqup7H8v8ubOjsVgS/58lyblxbrxumuffU2N7v3FuyufcVP3/v6+urv7mJ1cvYe+//376qdXjxo2rtf+kk06q3nzzzRd6/llnnVU83+YceA14DXgNeA14DUSTPweVlZXfmBWWeMvH4kotJKk+pGT+/Pnx8ccfx4orrhgVFRXR0FKS6969e1RWVkaHDh0a+nAaFefGefGa8ffkvaZhVTWiz6jU4vHpp59Gt27dvvG5Szx8/OAHP4illloqpk2bVmt/ut+lS5eFnt+mTZtiW9Dyyy8fjU36pTb0L7axcm6cF68Zf0/eaxpWh0byGbXccss1TMFp69ato3fv3jFmzJharRnpfr9+/Zb0jwMAmph66XZJ3SgHHnhg9OnTJzbffPO4/PLLY9asWcXoFwCgvNVL+Nhnn33iww8/jDPPPDOmTp0aG2+8cYwePTo6d+4cTU3qEkrzldTtGsK58Zrx9+S9xvtwQ2vTRD+jKlLVaUMfBABQPqztAgBkJXwAAFkJHwBAVsIHAJCV8AEAZCV8ADQCc+bMaehDgGwafG2XxujNN9+Mv/71rzF9+vRidtYFpblLysn9998fO+64Y7Rq1aq4/XV23XXXKFerrbZaPPfcc8WaRAuaMWNGbLrppvHOO+802LHReKX3l/PPPz+uvfbaYgmKSZMmFa+lM844I3r27BmHHnpolKOf/OQnX7u219ixY6OcXHHFFd/6uccff3w0Beb5qOOGG26Io48+ulijJq1Fs+AfQLo9ceLEKCctWrQoJorr1KlTcfurpHPz5ZdfRrla8DwtKH2g9OjRI+bOnRvlLi2xkLZFhfqbbropytE555wTt9xyS/H18MMPj1deeaUIH3fffXcxM/T48eOjHA0ePLjW/S+++CJefPHF4vyk2bN/97vfRTlZddVVa91Pk3jOnj27Zh20dJGzzDLLFO8/TeVCR8tHHeedd15xJXLKKac0zG+kkVnwQ6LuBwb/1zJU8sgjj9RaVCmFsfRhm65gy93ZZ59dfMCmJRe6du3aKFasbgxGjhwZ119/fWy//fZx1FFH1ezfaKON4vXXX49yddllly1y///8z//EZ599FuXm3Xffrbl9xx13xDXXXBM33nhjrL322sW+N954owivRx55ZDQZaYZT/qt9+/bVb7/9tlNSx7x586q322676kmTJjk3C6ioqCi2Fi1a1Nwuba1bt65ea621qh944IGyP2ddunSpHjlyZNmfh7ratm1bPXny5OL2sssuW/Pe849//KO6Xbt2zlcdb775ZvUKK6xQ1udltdVWq544ceJC+59//vnqnj17VjcVCk7r2GuvveLRRx9tmCTYiKWaj7///e8NfRiNTmoNSlvqWil1J5S21NWSrkh23nnnKHfz5s2LLbfcsqEPo9FZd91148knn1xo/x//+MfYZJNNGuSYGrPUDdW2bdsoZ//617/iP//5z0L7U0tr6uZtKnS71LHGGmsUxV5PP/10bLDBBsWHblMs5qkPv/jFL4qmvmHDhjX0oTTqZlEWdthhhxXNxelvi9oF7KmG4f333y8C65///OcisKbumAcffLBsT9Uee+xR635agix96D7//PNl/xrafvvti+6V3//+90UxezJhwoSiVrF///7RVCg4/YbCnlonq6KiyRTz1IfjjjuueFNcc801o3fv3tGuXbtaj1966aVRzmbNmhWPP/54TJkypbjSX1A5h9bkhBNOKF47G264YbHVDfXl/NpJLR+pHuall14q6hnSB0oKJTvssEOUq4MPPnihgu6VVloptttuu7I+L6Vi0xRY00rxpb+j1BIyYMCAuPnmmxcqem+shA++VupqWX/99Ys//jT87StfSBUVZTf8bUEvvPBC7LTTTkUFegohHTt2jH//+99NrgK9vnjtwJI1adKkmqLkXr16xVprrdWkTrHwwddaaqmliubO9AH6VXNZELHtttsWf/xpvoY04iVdxaarktRVla766zYjA98stSAuamh2qrEqd/PmzSu6e1dfffVo2bLpVVA0vSOuZ4cccsjXPl5u8xGkceTpBZ7Cx+TJkw23/QppDoLrrruuaCFKgS0Vm6awdtFFFxVNpMLHf/3zn/8svq688spR7tLr5euGHZfr3Dnpqj5NsDZu3LiFaj/KfU6h2bNnF13gaX6YpDQxXdr3wx/+ME499dRoCoSPOj755JOFJrdJE9ukSVxSf2O52XPPPePHP/5xzdwMaZ6G9OG6KOXctZBaOUqTsKWgluo+1llnnaIVpLKyMspdunJNc+hccsklNfM0tG/fPn71q1/Faaed9rUT2DVn995770LvN6kLL32wpLlRyrnmI13Np6Jb88LUNmTIkKJl9bHHHouf/vSnNftTsWmaB0X4aCZvBqU3zlRJnJq3yk2aACldtb/11ltF0WSayCZ9aFBbGhaZuqRSMW4Ka6lgMNV83HrrrUXNTLlLAaM0UmqrrbYq9j311FPFm2Va0yRN7FeOdtttt4X2/fznP4/11luvmOW0XKdXTy2JaQRHqmWgtlGjRhWvjS222KJWq1l6zbz99tvRZDT0RCNNxeuvv15MlFTODjrooOqqqqqGPoxG6bnnnqseO3ZscXvatGnVAwYMKCas6927d/ULL7xQXe66du1afd999y20f9SoUdXdunVrkGNqzNJkY+U8yVifPn2qn3zyyYY+jEZp6aWXrpmMbsGJ6V588cXqDh06VDcVul2+pZQoFzWxSzkZMWJEQx9Co5WuOlJ/dKnbJRWepla0NInUxhtvHOXu448/XuRVbNqXHuO/Pv/882IhsdR/X06qqqpqbl944YVx8sknxwUXXLDI+ZY6dOgQ5apPnz7x0EMPFTUeSan1I8370a9fv2gqhI86TjzxxEVObpN+2alwEL6q+Tx1T6X1OVJ9UGoSTW+YqeslzWGRuu3KWVqr5Kqrrlpodc60Lz1WrlZYYYVaTefp/ebTTz8thmjfdtttUW7F7XXPRZpQa0EKTqMIZGml8VdffbW4IE6L7KXbqTg3zTPUVBhq+w3zESw4uU0aCdMUhzRR/9IqyOkPP7WApCuQK6+8sigc/NOf/lTUf7z22mtl/WtI52bgwIHFEMnS1VmaKjsV5j788MPxox/9KMpRKixNIzdKRdyl95u+ffsWIaSchpQu+MGZRtZ17959oeL2VH+XXjPlfiH4zjvvxNChQ2tNTJcWQ02tRE2F8LGIYUwpXZdm70x/BKnAJ41cSDPIwaKkK9U04U/6sNh7772LEHLWWWcVI13SypPpdVXu0hTiw4cPrwli6W/qmGOOiW7dukW5WnAenQV99NFHxb5yHVLqvHy1Aw44oLhI3mabbZr0IIjyHN/2NXbfffdihEJSaj5PwwPT/vTGCV+1JlAKqSlsPPLIIzVTQKcJksq5f3pBaXK6XXfdtRhGma5cN99882Ktjvvvvz/KVakboa50NVvOC6g5L1+tdevWRatHmtQwtQ6liQxTa+ubb74ZTYmWjzo0n/NdpFVI99tvv+JKNfVTl1ZGTm8STzzxRNG1UM7SOhTpii1d0ZcKc0vKcdKoUm1Z6q9Pw9dTy1lJOhfPPPNMcfX/t7/9LcqJ87J4LYnpvSV1V6UtTTaW5kQpTeLX2ClgqCM1j5fmsUgfIKmIMPXDphaQ9957ryF+RzQBaW6GrbfeumhCX7CAMgWRn/3sZ1HuUmX+XnvtVdS/dO7cOcpdqgdKUhB7+eWXi6vZknQ7vYZ+/etfR7lxXhavWDm1JqavqVg31SOmeqGmQstHHWnFzbT8d/rASJNDpSu2VCCXJrxJBXNTp05tmN8UNGGp6yl9sDTlPur6kLqgUuuHrjnn5dv6zW9+U8xumv6eUt1UmtQwrS2VakBSEGkqhI86NJ/DkpdGiqWZTct1xk5YUlr8/4iowYMHFy3zTW012xLhYxFS60ap+by05sSzzz5bXJ2Y7he+W3dm6nZJb5qLmjQqTd0PfLM0vDbVeKTWjyeffLLopiu1fpRW124KhA+g3qV1XdIEbGkER+qnXnCER7pdzosSwvcNI5dddlncfvvtxTwoTaV4W/gA6l2XLl2K1o204ma5rmALS0J1dXVR75FaPtKWFmhMU9OnesXUApKCSFMgfAD1rmPHjsWqvwpO4ftJRaVpHphUFlDqbkkzBKcRL02J8AHUu1Qcl+o9UqU+8N2ldcZS2GjqI6SED6DepS6XkSNHFldrqXm4bsFpWnwPKB/CB5B9wcZab0IVFTF27Fi/BSgjwgcAkJWycwAgK+EDAMhK+AAAshI+AICshA+gXqTJjwYNGuTsAgsRPgCArIQPACAr4QP43mbNmhUHHHBALLvsstG1a9e45JJLaj1+6623Rp8+faJ9+/bFInP77bdfTJ8+vWahrDXWWCMuvvjiWv/mxRdfLCYge+utt/yGoJkRPoDv7aSTTorHH3887rvvvnj00UeL1TYnTpxY8/gXX3wR5557brH896hRo2Ly5Mlx0EEHFY+lgHHIIYfEiBEjan3PdH+bbbYpggnQvJjhFPhe0gqbK664Ytx2222x1157Ffs+/vjjWHnlleOII46Iyy+/fKF/8/zzz8dmm20Wn376adFa8sEHH0SPHj1i3LhxsfnmmxdhpVu3bkVryIEHHug3BM2Mlg/ge3n77bdj3rx50bdv35p9HTt2jLXXXrvm/oQJE2KXXXYpAkbqeklLgSdTpkwpvqagMXDgwLjpppuK+w888EDMnTu3JswAzYvwAdR7PciAAQOKJcBvv/32eO655+Lee+8tHkuhpeSwww6Lu+66Kz7//POiy2WfffaJZZZZxm8HmiHhA/heVl999WjVqlU888wzNfs++eSTmDRpUnH79ddfj48++iiGDRsWP/rRj6JXr141xaYL2mmnnaJdu3YxfPjwGD16dFEHAjRPLRv6AICmLdVsHHrooUXRaar96NSpU5x22mnRosX/XdukrpbWrVvHlVdeGUcddVS88sorRfFpXUsttVRRhDpkyJBYc801o1+/fg3wvwFy0PIBfG+//e1vi1aNVNfRv3//2HrrraN3797FYyuttFLcfPPNcc8998S6665btIDUHVZbkkJM6oo5+OCD/VagGTPaBWg0nnzyydh+++2jsrIyOnfu3NCHA9QT4QNocGlky4cfflgMq02TkKXCVKD50u0CNLg777wzVllllZgxY0ZcdNFFDX04QD3T8gEAZKXlAwDISvgAALISPgCArIQPACAr4QMAyEr4AACyEj4AgKyEDwAgcvpf0a6UiZvwAS8AAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# one hot encorder\n", + "X['day'].value_counts().plot(kind='bar')" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 fri\n", + "1 tue\n", + "2 sat\n", + "3 fri\n", + "4 sun\n", + " ... \n", + "512 sun\n", + "513 sun\n", + "514 sun\n", + "515 sat\n", + "516 tue\n", + "Name: day, Length: 517, dtype: object" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X['day']" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
OneHotEncoder(drop='if_binary')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "OneHotEncoder(drop='if_binary')" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "onehot = OneHotEncoder(drop='if_binary')\n", + "onehot.fit(X[['day']])" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 1., 0.],\n", + " [0., 0., 1., ..., 0., 0., 0.],\n", + " ...,\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 1., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 1., 0.]], shape=(517, 7))" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "listening_enc = onehot.transform(X[['day']])\n", + "listening_enc.toarray()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# one hot encoding for day\n", + "X['day'].value_counts" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
OneHotEncoder(drop='if_binary')
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "OneHotEncoder(drop='if_binary')" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.preprocessing import OneHotEncoder\n", + "onehot = OneHotEncoder(drop='if_binary')\n", + "onehot.fit(X[['day']])" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[1., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 1., 0.],\n", + " [0., 0., 1., ..., 0., 0., 0.],\n", + " ...,\n", + " [0., 0., 0., ..., 0., 0., 0.],\n", + " [0., 0., 1., ..., 0., 0., 0.],\n", + " [0., 0., 0., ..., 0., 1., 0.]], shape=(517, 7))" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "listing_enc = onehot.transform(X[['day']])\n", + "listing_enc.toarray()" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('preprocess',\n",
+       "                 ColumnTransformer(remainder='passthrough',\n",
+       "                                   transformers=[('numeric_simple',\n",
+       "                                                  Pipeline(steps=[('imputer',\n",
+       "                                                                   SimpleImputer(strategy='median')),\n",
+       "                                                                  ('standardizer',\n",
+       "                                                                   StandardScaler())]),\n",
+       "                                                  Index(['coord_x', 'coord_y', 'ffmc', 'dmc', 'dc', 'isi', 'temp', 'rh', 'wind',\n",
+       "       'rain'],\n",
+       "      dtype='object'))])),\n",
+       "                ('model', Lasso())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('numeric_simple',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='median')),\n", + " ('standardizer',\n", + " StandardScaler())]),\n", + " Index(['coord_x', 'coord_y', 'ffmc', 'dmc', 'dc', 'isi', 'temp', 'rh', 'wind',\n", + " 'rain'],\n", + " dtype='object'))])),\n", + " ('model', Lasso())])" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import StandardScaler, PowerTransformer\n", + "from sklearn.impute import SimpleImputer, KNNImputer\n", + "from sklearn.model_selection import train_test_split, cross_validate\n", + "from sklearn.linear_model import Lasso\n", + "\n", + "num_cols = numeric_features \n", + "\n", + "pipe_num_simple = Pipeline([\n", + " ('imputer', SimpleImputer(strategy = 'median')),\n", + " ('standardizer', StandardScaler())\n", + "])\n", + "\n", + "ctransform_simple= ColumnTransformer([\n", + " ('numeric_simple', pipe_num_simple, num_cols),\n", + "], remainder='passthrough')\n", + "\n", + "pipe_simple = Pipeline([\n", + " ('preprocess', ctransform_simple),\n", + " ('model', Lasso())\n", + "])\n", + "pipe_simple" + ] }, { "cell_type": "markdown", @@ -197,12 +4852,1576 @@ "+ Categorical: one-hot encoding." ] }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['coord_x', 'coord_y', 'ffmc', 'dmc', 'dc', 'isi', 'temp', 'rh', 'wind',\n", + " 'rain'],\n", + " dtype='object')" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "numeric_features" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Pipeline(steps=[('preprocess',\n",
+       "                 ColumnTransformer(remainder='passthrough',\n",
+       "                                   transformers=[('numeric_std',\n",
+       "                                                  Pipeline(steps=[('imputer',\n",
+       "                                                                   SimpleImputer(strategy='median')),\n",
+       "                                                                  ('standardizer',\n",
+       "                                                                   StandardScaler())]),\n",
+       "                                                  Index(['coord_x', 'coord_y', 'ffmc', 'dmc', 'dc', 'isi', 'temp', 'rh', 'wind',\n",
+       "       'rain'],\n",
+       "      dtype='object')),\n",
+       "                                                 ('numeric_yj',\n",
+       "                                                  Pipeline(steps=[('imputer',\n",
+       "                                                                   SimpleImputer(strategy='median')),\n",
+       "                                                                  ('standardizer',\n",
+       "                                                                   StandardScaler()),\n",
+       "                                                                  ('transform',\n",
+       "                                                                   PowerTransformer())]),\n",
+       "                                                  ['temp', 'wind', 'ffmc'])])),\n",
+       "                ('clf', Lasso())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "Pipeline(steps=[('preprocess',\n", + " ColumnTransformer(remainder='passthrough',\n", + " transformers=[('numeric_std',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='median')),\n", + " ('standardizer',\n", + " StandardScaler())]),\n", + " Index(['coord_x', 'coord_y', 'ffmc', 'dmc', 'dc', 'isi', 'temp', 'rh', 'wind',\n", + " 'rain'],\n", + " dtype='object')),\n", + " ('numeric_yj',\n", + " Pipeline(steps=[('imputer',\n", + " SimpleImputer(strategy='median')),\n", + " ('standardizer',\n", + " StandardScaler()),\n", + " ('transform',\n", + " PowerTransformer())]),\n", + " ['temp', 'wind', 'ffmc'])])),\n", + " ('clf', Lasso())])" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "num_cols = numeric_features\n", + "\n", + "num_cols_transform = ['temp', 'wind', 'ffmc']\n", + "\n", + "pipe_num_simple = Pipeline([\n", + " ('imputer', SimpleImputer(strategy = 'median')),\n", + " ('standardizer', StandardScaler())\n", + "])\n", + "\n", + "pipe_num_yj = Pipeline([\n", + " ('imputer', SimpleImputer(strategy = 'median')),\n", + " ('standardizer', StandardScaler()),\n", + " ('transform', PowerTransformer(method='yeo-johnson'))\n", + "])\n", + "\n", + "ctransform_yj= ColumnTransformer([\n", + " ('numeric_std', pipe_num_simple, num_cols),\n", + " ('numeric_yj', pipe_num_yj, num_cols_transform),\n", + "], remainder='passthrough')\n", + "\n", + "pipe_yj = Pipeline([\n", + " ('preprocess', ctransform_yj),\n", + " ('clf', Lasso())\n", + "])\n", + "pipe_yj" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import StandardScaler, PowerTransformer\n", + "from sklearn.impute import SimpleImputer, KNNImputer\n", + "from sklearn.model_selection import train_test_split, cross_validate\n", + "from sklearn.linear_model import Lasso\n", + "\n", + "num_cols = numeric_features \n", + "\n", + "pipe_num_simple = Pipeline([\n", + " ('imputer', SimpleImputer(strategy = 'median')),\n", + " ('standardizer', StandardScaler())\n", + "])\n", + "\n", + "ctransform_simple= ColumnTransformer([\n", + " ('numeric_simple', pipe_num_simple, num_cols),\n", + "], remainder='passthrough')\n", + "\n", + "pipe_simple = Pipeline([\n", + " ('preprocess', ctransform_simple),\n", + " ('model', ())\n", + "])\n", + "pipe_simple" + ] }, { "cell_type": "markdown", @@ -423,7 +6642,7 @@ ], "metadata": { "kernelspec": { - "display_name": "env", + "display_name": "production-env (3.11.14)", "language": "python", "name": "python3" }, @@ -437,7 +6656,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.3" + "version": "3.11.14" } }, "nbformat": 4, diff --git a/05_src/data/fires/forestfires.names b/05_src/data/fires/forestfires.names new file mode 100644 index 000000000..a4a2fcfb4 --- /dev/null +++ b/05_src/data/fires/forestfires.names @@ -0,0 +1,66 @@ +Citation Request: + This dataset is public available for research. The details are described in [Cortez and Morais, 2007]. + Please include this citation if you plan to use this database: + + P. Cortez and A. Morais. A Data Mining Approach to Predict Forest Fires using Meteorological Data. + In J. Neves, M. F. Santos and J. Machado Eds., New Trends in Artificial Intelligence, + Proceedings of the 13th EPIA 2007 - Portuguese Conference on Artificial Intelligence, December, + Guimaraes, Portugal, pp. 512-523, 2007. APPIA, ISBN-13 978-989-95618-0-9. + Available at: http://www.dsi.uminho.pt/~pcortez/fires.pdf + +1. Title: Forest Fires + +2. Sources + Created by: Paulo Cortez and Aníbal Morais (Univ. Minho) @ 2007 + +3. Past Usage: + + P. Cortez and A. Morais. A Data Mining Approach to Predict Forest Fires using Meteorological Data. + In Proceedings of the 13th EPIA 2007 - Portuguese Conference on Artificial Intelligence, + December, 2007. (http://www.dsi.uminho.pt/~pcortez/fires.pdf) + + In the above reference, the output "area" was first transformed with a ln(x+1) function. + Then, several Data Mining methods were applied. After fitting the models, the outputs were + post-processed with the inverse of the ln(x+1) transform. Four different input setups were + used. The experiments were conducted using a 10-fold (cross-validation) x 30 runs. Two + regression metrics were measured: MAD and RMSE. A Gaussian support vector machine (SVM) fed + with only 4 direct weather conditions (temp, RH, wind and rain) obtained the best MAD value: + 12.71 +- 0.01 (mean and confidence interval within 95% using a t-student distribution). The + best RMSE was attained by the naive mean predictor. An analysis to the regression error curve + (REC) shows that the SVM model predicts more examples within a lower admitted error. In effect, + the SVM model predicts better small fires, which are the majority. + +4. Relevant Information: + + This is a very difficult regression task. It can be used to test regression methods. Also, + it could be used to test outlier detection methods, since it is not clear how many outliers + are there. Yet, the number of examples of fires with a large burned area is very small. + +5. Number of Instances: 517 + +6. Number of Attributes: 12 + output attribute + + Note: several of the attributes may be correlated, thus it makes sense to apply some sort of + feature selection. + +7. Attribute information: + + For more information, read [Cortez and Morais, 2007]. + + 1. X - x-axis spatial coordinate within the Montesinho park map: 1 to 9 + 2. Y - y-axis spatial coordinate within the Montesinho park map: 2 to 9 + 3. month - month of the year: "jan" to "dec" + 4. day - day of the week: "mon" to "sun" + 5. FFMC - FFMC index from the FWI system: 18.7 to 96.20 + 6. DMC - DMC index from the FWI system: 1.1 to 291.3 + 7. DC - DC index from the FWI system: 7.9 to 860.6 + 8. ISI - ISI index from the FWI system: 0.0 to 56.10 + 9. temp - temperature in Celsius degrees: 2.2 to 33.30 + 10. RH - relative humidity in %: 15.0 to 100 + 11. wind - wind speed in km/h: 0.40 to 9.40 + 12. rain - outside rain in mm/m2 : 0.0 to 6.4 + 13. area - the burned area of the forest (in ha): 0.00 to 1090.84 + (this output variable is very skewed towards 0.0, thus it may make + sense to model with the logarithm transform). + +8. Missing Attribute Values: None