From 844ba7e017f1400008b4b3bbbd1f14257c9967dd Mon Sep 17 00:00:00 2001 From: Aunima Date: Sun, 19 Apr 2026 22:29:21 -0400 Subject: [PATCH 1/2] Complete assignment 1 --- 02_activities/assignments/assignment_1.ipynb | 1445 +++++++++++++++++- 1 file changed, 1410 insertions(+), 35 deletions(-) diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 1d25bbcb3..a2d793d1d 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -56,10 +56,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.datasets import load_wine\n", "\n", @@ -91,12 +369,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "56916892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "178\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "print(wine_df.shape[0])" ] }, { @@ -109,12 +396,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "df0ef103", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 alcohol 178 non-null float64\n", + " 1 malic_acid 178 non-null float64\n", + " 2 ash 178 non-null float64\n", + " 3 alcalinity_of_ash 178 non-null float64\n", + " 4 magnesium 178 non-null float64\n", + " 5 total_phenols 178 non-null float64\n", + " 6 flavanoids 178 non-null float64\n", + " 7 nonflavanoid_phenols 178 non-null float64\n", + " 8 proanthocyanins 178 non-null float64\n", + " 9 color_intensity 178 non-null float64\n", + " 10 hue 178 non-null float64\n", + " 11 od280/od315_of_diluted_wines 178 non-null float64\n", + " 12 proline 178 non-null float64\n", + " 13 class 178 non-null int64 \n", + "dtypes: float64(13), int64(1)\n", + "memory usage: 19.6 KB\n", + "None\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "print(wine_df.info())" ] }, { @@ -127,12 +444,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "int64\n", + "[0 1 2]\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "print(wine_df['class'].dtype)\n", + "print(wine_df[\"class\"].unique())" ] }, { @@ -146,12 +474,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "bd7b0910", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "print(wine_df.shape[1]-1)" ] }, { @@ -175,10 +512,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "cc899b59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + "0 0.251717 0.362177 1.847920 1.013009 \n", + "1 -0.293321 0.406051 1.113449 0.965242 \n", + "2 0.269020 0.318304 0.788587 1.395148 \n", + "3 1.186068 -0.427544 1.184071 2.334574 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 \n" + ] + } + ], "source": [ "# Select predictors (excluding the last column)\n", "predictors = wine_df.iloc[:, :-1]\n", @@ -204,7 +568,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "> It is important for KNN because it is based on distance. Without standardization, it could incorrectly identify nearest neighbours. Therefore, standardization helps to ensure each feature is equal to the distance between wine samples" ] }, { @@ -220,7 +584,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "> The variable \"class\" is categorical, therefore not quantities with meaningful magnitude" ] }, { @@ -236,7 +600,7 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "> It is to ensure that splitting of the data (training and testing) each time we run the code. No, the seed value does not matter because the value does not impact the model, but rather it is important to fix it to one value" ] }, { @@ -251,7 +615,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "72c101f2", "metadata": {}, "outputs": [], @@ -261,7 +625,10 @@ "\n", "# split the data into a training and testing set. hint: use train_test_split !\n", "\n", - "# Your code here ..." + "# Your code here ...\n", + "x_train, x_test, y_train, y_test = train_test_split(\n", + " predictors_standardized, wine_df[\"class\"], train_size = 0.75, shuffle = True, stratify= wine_df[\"class\"], random_state = 123\n", + ")" ] }, { @@ -284,12 +651,1011 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "id": "08818c64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=10, estimator=KNeighborsClassifier(),\n",
+       "             param_grid={'n_neighbors': range(1, 51)})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=10, estimator=KNeighborsClassifier(),\n", + " param_grid={'n_neighbors': range(1, 51)})" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "# Your code here...\n", + "knn = KNeighborsClassifier()\n", + "parameter_grid = {\n", + " \"n_neighbors\" : range(1, 51)\n", + "}\n", + "wine_tune_grid = GridSearchCV(\n", + " estimator = knn,\n", + " param_grid = parameter_grid,\n", + " cv = 10 \n", + ")\n", + "wine_tune_grid.fit(x_train, y_train)" ] }, { @@ -305,12 +1671,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "id": "ffefa9f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final Test Accuracy: 93.33%\n" + ] + } + ], "source": [ - "# Your code here..." + "# Your code here...\n", + "best_n_neighbors = wine_tune_grid.best_params_['n_neighbors']\n", + "knn_best = KNeighborsClassifier(n_neighbors = best_n_neighbors)\n", + "knn.fit(x_train, y_train)\n", + "y_pred = knn.predict(x_test)\n", + "final_accuracy = accuracy_score(y_test, y_pred)\n", + "print(f\"Final Test Accuracy: {final_accuracy:.2%}\")" ] }, { @@ -365,7 +1745,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "lcr-env", "language": "python", "name": "python3" }, @@ -379,12 +1759,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.11.15" } }, "nbformat": 4, From 74822aadf5b1c4f77b327c9116522e781fcd2bd1 Mon Sep 17 00:00:00 2001 From: Aunima Date: Mon, 20 Apr 2026 09:39:40 -0400 Subject: [PATCH 2/2] assignment-1 --- 02_activities/assignments/assignment_1.ipynb | 1445 +++++++++++++++++- 1 file changed, 1410 insertions(+), 35 deletions(-) diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 1d25bbcb3..a2d793d1d 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -56,10 +56,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.datasets import load_wine\n", "\n", @@ -91,12 +369,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "56916892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "178\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "print(wine_df.shape[0])" ] }, { @@ -109,12 +396,42 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "df0ef103", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 178 entries, 0 to 177\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 alcohol 178 non-null float64\n", + " 1 malic_acid 178 non-null float64\n", + " 2 ash 178 non-null float64\n", + " 3 alcalinity_of_ash 178 non-null float64\n", + " 4 magnesium 178 non-null float64\n", + " 5 total_phenols 178 non-null float64\n", + " 6 flavanoids 178 non-null float64\n", + " 7 nonflavanoid_phenols 178 non-null float64\n", + " 8 proanthocyanins 178 non-null float64\n", + " 9 color_intensity 178 non-null float64\n", + " 10 hue 178 non-null float64\n", + " 11 od280/od315_of_diluted_wines 178 non-null float64\n", + " 12 proline 178 non-null float64\n", + " 13 class 178 non-null int64 \n", + "dtypes: float64(13), int64(1)\n", + "memory usage: 19.6 KB\n", + "None\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "print(wine_df.info())" ] }, { @@ -127,12 +444,23 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "int64\n", + "[0 1 2]\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "print(wine_df['class'].dtype)\n", + "print(wine_df[\"class\"].unique())" ] }, { @@ -146,12 +474,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "bd7b0910", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "13\n" + ] + } + ], "source": [ - "# Your answer here" + "# Your answer here\n", + "print(wine_df.shape[1]-1)" ] }, { @@ -175,10 +512,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "cc899b59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + "0 0.251717 0.362177 1.847920 1.013009 \n", + "1 -0.293321 0.406051 1.113449 0.965242 \n", + "2 0.269020 0.318304 0.788587 1.395148 \n", + "3 1.186068 -0.427544 1.184071 2.334574 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 \n" + ] + } + ], "source": [ "# Select predictors (excluding the last column)\n", "predictors = wine_df.iloc[:, :-1]\n", @@ -204,7 +568,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "> It is important for KNN because it is based on distance. Without standardization, it could incorrectly identify nearest neighbours. Therefore, standardization helps to ensure each feature is equal to the distance between wine samples" ] }, { @@ -220,7 +584,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "> The variable \"class\" is categorical, therefore not quantities with meaningful magnitude" ] }, { @@ -236,7 +600,7 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "> It is to ensure that splitting of the data (training and testing) each time we run the code. No, the seed value does not matter because the value does not impact the model, but rather it is important to fix it to one value" ] }, { @@ -251,7 +615,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "72c101f2", "metadata": {}, "outputs": [], @@ -261,7 +625,10 @@ "\n", "# split the data into a training and testing set. hint: use train_test_split !\n", "\n", - "# Your code here ..." + "# Your code here ...\n", + "x_train, x_test, y_train, y_test = train_test_split(\n", + " predictors_standardized, wine_df[\"class\"], train_size = 0.75, shuffle = True, stratify= wine_df[\"class\"], random_state = 123\n", + ")" ] }, { @@ -284,12 +651,1011 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 45, "id": "08818c64", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=10, estimator=KNeighborsClassifier(),\n",
+       "             param_grid={'n_neighbors': range(1, 51)})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=10, estimator=KNeighborsClassifier(),\n", + " param_grid={'n_neighbors': range(1, 51)})" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Your code here..." + "# Your code here...\n", + "knn = KNeighborsClassifier()\n", + "parameter_grid = {\n", + " \"n_neighbors\" : range(1, 51)\n", + "}\n", + "wine_tune_grid = GridSearchCV(\n", + " estimator = knn,\n", + " param_grid = parameter_grid,\n", + " cv = 10 \n", + ")\n", + "wine_tune_grid.fit(x_train, y_train)" ] }, { @@ -305,12 +1671,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 46, "id": "ffefa9f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final Test Accuracy: 93.33%\n" + ] + } + ], "source": [ - "# Your code here..." + "# Your code here...\n", + "best_n_neighbors = wine_tune_grid.best_params_['n_neighbors']\n", + "knn_best = KNeighborsClassifier(n_neighbors = best_n_neighbors)\n", + "knn.fit(x_train, y_train)\n", + "y_pred = knn.predict(x_test)\n", + "final_accuracy = accuracy_score(y_test, y_pred)\n", + "print(f\"Final Test Accuracy: {final_accuracy:.2%}\")" ] }, { @@ -365,7 +1745,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "lcr-env", "language": "python", "name": "python3" }, @@ -379,12 +1759,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.11.15" } }, "nbformat": 4,