From e25483c9600049443e1e8f4813514b795a9da833 Mon Sep 17 00:00:00 2001 From: Serene Moussaoui Date: Tue, 21 Apr 2026 10:52:48 -0400 Subject: [PATCH 1/2] completed assignment 1 --- 02_activities/assignments/assignment_1.ipynb | 194 +++++++++++++++++-- 1 file changed, 177 insertions(+), 17 deletions(-) diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index 1d25bbcb3..efcbef428 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -96,7 +96,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "wine_df.shape[0]\n", + "# 178" ] }, { @@ -114,7 +115,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "wine_df.shape[1]\n", + "# 14" ] }, { @@ -132,7 +134,19 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "wine_df['class'].dtype\n", + "# integer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0b5a193", + "metadata": {}, + "outputs": [], + "source": [ + "wine_df['class'].unique()\n", + "# 0, 1 and 2" ] }, { @@ -151,7 +165,8 @@ "metadata": {}, "outputs": [], "source": [ - "# Your answer here" + "wine_df.shape[1] - 1\n", + "# 13" ] }, { @@ -204,7 +219,7 @@ "id": "403ef0bb", "metadata": {}, "source": [ - "> Your answer here..." + "It is important to standardize the predictor variables because KNN is distance based. Variables with larger scales will dominate the distance calculation and so standardization ensures that alll the variables contribute equally." ] }, { @@ -220,7 +235,7 @@ "id": "fdee5a15", "metadata": {}, "source": [ - "> Your answer here..." + "The response variable Class is not numerical. It is categorical despite .dtype outputting integer (likely because the Class values are numbers referring to a particular category). Therefore we cannot compute distance calculations (meaningfully) and so scaling is not needed." ] }, { @@ -236,7 +251,17 @@ "id": "f0676c21", "metadata": {}, "source": [ - "> Your answer here..." + "Setting a seed ensures reproducibility when someone else runs my code. The specific value doesn't matter so long as it is fixed. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c6cc557", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.seed(123) # I did not run this line since a seed will be set in the next few cells, but I included it here because it said to \"do so below\"." ] }, { @@ -249,6 +274,30 @@ "Extend the code to create a non-overlapping test set for the predictors and response variables." ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "654c31d6", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a copy of the original 'wine_df' dataframe to ensure we're not modifying the original data\n", + "wine_scaled = predictors_standardized.copy()\n", + "wine_scaled[\"class\"] = wine_df[\"class\"] # need to add this column since predictors_standardized doesn't include class\n", + "wine_scaled" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "836f930c", + "metadata": {}, + "outputs": [], + "source": [ + "print(wine_scaled.describe())\n", + "# all the predictors seem to be on comparable scales." + ] + }, { "cell_type": "code", "execution_count": null, @@ -260,8 +309,19 @@ "np.random.seed(123)\n", "\n", "# split the data into a training and testing set. hint: use train_test_split !\n", + "wine_train, wine_test = train_test_split(\n", + " wine_scaled,\n", + " train_size=0.75,\n", + " shuffle=True,\n", + " stratify=wine_scaled[\"class\"]\n", + ")\n", "\n", - "# Your code here ..." + "# create predictor and response training and test sets:\n", + "X_train = wine_train.drop(\"class\", axis=1)\n", + "y_train = wine_train[\"class\"]\n", + "\n", + "X_test = wine_test.drop(\"class\", axis=1)\n", + "y_test = wine_test[\"class\"]\n" ] }, { @@ -289,7 +349,61 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here..." + "# 1. Initialize the KNN classifier\n", + "knn = KNeighborsClassifier()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45dfedef", + "metadata": {}, + "outputs": [], + "source": [ + "# 2. Define a parameter grid for n_neighbors ranging from 1 to 50\n", + "parameter_grid = {\n", + " \"n_neighbors\": range(1, 51),\n", + "}\n", + "\n", + "parameter_grid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "94bd9d0f", + "metadata": {}, + "outputs": [], + "source": [ + "# 3. Implement a grid search using GridSearchCV with 10 fold cross validation to find the optimal number of neighbors\n", + "grid_search = GridSearchCV(\n", + " estimator=knn,\n", + " param_grid=parameter_grid,\n", + " cv=10\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4bb32f37", + "metadata": {}, + "outputs": [], + "source": [ + "# 4.1 Fit model on training data.\n", + "grid_search.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "721f7423", + "metadata": {}, + "outputs": [], + "source": [ + "# 4.2 Return the best n_neighbours\n", + "grid_search.best_params_\n", + "# the best n_neighbours is 7" ] }, { @@ -310,7 +424,58 @@ "metadata": {}, "outputs": [], "source": [ - "# Your code here..." + "# assign the best k from grid search to a variable\n", + "best_k = grid_search.best_params_[\"n_neighbors\"]\n", + "best_k" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f958b9e6", + "metadata": {}, + "outputs": [], + "source": [ + "# initialize a new KNN model using best k\n", + "knn_final = KNeighborsClassifier(n_neighbors=best_k)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ea7d9b2", + "metadata": {}, + "outputs": [], + "source": [ + "# fit the model on training data\n", + "knn_final.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdb01d9b", + "metadata": {}, + "outputs": [], + "source": [ + "# make predictions on the test set\n", + "y_pred = knn_final.predict(X_test)\n", + "\n", + "# add these predictions to the wine_test data set as a new column predicted_class\n", + "wine_test[\"predicted_class\"] = y_pred\n", + "wine_test" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2de5a14b", + "metadata": {}, + "outputs": [], + "source": [ + "# evaluate accuracy_score\n", + "accuracy_score(y_test, y_pred)\n", + "# score is 0.933" ] }, { @@ -365,7 +530,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.10.4", + "display_name": "lcr-env", "language": "python", "name": "python3" }, @@ -379,12 +544,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" - }, - "vscode": { - "interpreter": { - "hash": "497a84dc8fec8cf8d24e7e87b6d954c9a18a327edc66feb9b9ea7e9e72cc5c7e" - } + "version": "3.11.14" } }, "nbformat": 4, From 034de0c6a96945cacc7bb66ef97a3fa3b8c34881 Mon Sep 17 00:00:00 2001 From: Serene Moussaoui Date: Tue, 21 Apr 2026 10:55:20 -0400 Subject: [PATCH 2/2] adjustment to completed assignment 1 --- 02_activities/assignments/assignment_1.ipynb | 3656 +++++++++++++++++- 1 file changed, 3619 insertions(+), 37 deletions(-) diff --git a/02_activities/assignments/assignment_1.ipynb b/02_activities/assignments/assignment_1.ipynb index efcbef428..8b11ea64d 100644 --- a/02_activities/assignments/assignment_1.ipynb +++ b/02_activities/assignments/assignment_1.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4a3485d6-ba58-4660-a983-5680821c5719", "metadata": {}, "outputs": [], @@ -56,10 +56,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "a431d282-f9ca-4d5d-8912-71ffc9d8ea19", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
014.231.712.4315.6127.02.803.060.282.295.641.043.921065.00
113.201.782.1411.2100.02.652.760.261.284.381.053.401050.00
213.162.362.6718.6101.02.803.240.302.815.681.033.171185.00
314.371.952.5016.8113.03.853.490.242.187.800.863.451480.00
413.242.592.8721.0118.02.802.690.391.824.321.042.93735.00
.............................................
17313.715.652.4520.595.01.680.610.521.067.700.641.74740.02
17413.403.912.4823.0102.01.800.750.431.417.300.701.56750.02
17513.274.282.2620.0120.01.590.690.431.3510.200.591.56835.02
17613.172.592.3720.0120.01.650.680.531.469.300.601.62840.02
17714.134.102.7424.596.02.050.760.561.359.200.611.60560.02
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium total_phenols \\\n", + "0 14.23 1.71 2.43 15.6 127.0 2.80 \n", + "1 13.20 1.78 2.14 11.2 100.0 2.65 \n", + "2 13.16 2.36 2.67 18.6 101.0 2.80 \n", + "3 14.37 1.95 2.50 16.8 113.0 3.85 \n", + "4 13.24 2.59 2.87 21.0 118.0 2.80 \n", + ".. ... ... ... ... ... ... \n", + "173 13.71 5.65 2.45 20.5 95.0 1.68 \n", + "174 13.40 3.91 2.48 23.0 102.0 1.80 \n", + "175 13.27 4.28 2.26 20.0 120.0 1.59 \n", + "176 13.17 2.59 2.37 20.0 120.0 1.65 \n", + "177 14.13 4.10 2.74 24.5 96.0 2.05 \n", + "\n", + " flavanoids nonflavanoid_phenols proanthocyanins color_intensity hue \\\n", + "0 3.06 0.28 2.29 5.64 1.04 \n", + "1 2.76 0.26 1.28 4.38 1.05 \n", + "2 3.24 0.30 2.81 5.68 1.03 \n", + "3 3.49 0.24 2.18 7.80 0.86 \n", + "4 2.69 0.39 1.82 4.32 1.04 \n", + ".. ... ... ... ... ... \n", + "173 0.61 0.52 1.06 7.70 0.64 \n", + "174 0.75 0.43 1.41 7.30 0.70 \n", + "175 0.69 0.43 1.35 10.20 0.59 \n", + "176 0.68 0.53 1.46 9.30 0.60 \n", + "177 0.76 0.56 1.35 9.20 0.61 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "0 3.92 1065.0 0 \n", + "1 3.40 1050.0 0 \n", + "2 3.17 1185.0 0 \n", + "3 3.45 1480.0 0 \n", + "4 2.93 735.0 0 \n", + ".. ... ... ... \n", + "173 1.74 740.0 2 \n", + "174 1.56 750.0 2 \n", + "175 1.56 835.0 2 \n", + "176 1.62 840.0 2 \n", + "177 1.60 560.0 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "from sklearn.datasets import load_wine\n", "\n", @@ -91,10 +369,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "56916892", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "178" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "wine_df.shape[0]\n", "# 178" @@ -110,10 +399,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "df0ef103", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "14" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "wine_df.shape[1]\n", "# 14" @@ -129,10 +429,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "47989426", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "dtype('int64')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "wine_df['class'].dtype\n", "# integer" @@ -140,10 +451,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "f0b5a193", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 1, 2])" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "wine_df['class'].unique()\n", "# 0, 1 and 2" @@ -160,10 +482,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "bd7b0910", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "13" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "wine_df.shape[1] - 1\n", "# 13" @@ -190,10 +523,37 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "cc899b59", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline \n", + "0 0.251717 0.362177 1.847920 1.013009 \n", + "1 -0.293321 0.406051 1.113449 0.965242 \n", + "2 0.269020 0.318304 0.788587 1.395148 \n", + "3 1.186068 -0.427544 1.184071 2.334574 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 \n" + ] + } + ], "source": [ "# Select predictors (excluding the last column)\n", "predictors = wine_df.iloc[:, :-1]\n", @@ -276,10 +636,288 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "654c31d6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclass
01.518613-0.5622500.232053-1.1695931.9139050.8089971.034819-0.6595631.2248840.2517170.3621771.8479201.0130090
10.246290-0.499413-0.827996-2.4908470.0181450.5686480.733629-0.820719-0.544721-0.2933210.4060511.1134490.9652420
20.1968790.0212311.109334-0.2687380.0883580.8089971.215533-0.4984072.1359680.2690200.3183040.7885871.3951480
31.691550-0.3468110.487926-0.8092510.9309182.4914461.466525-0.9818751.0321551.186068-0.4275441.1840712.3345740
40.2957000.2276941.8404030.4519461.2819850.8089970.6633510.2267960.401404-0.3192760.3621770.449601-0.0378740
.............................................
1730.8762752.9745430.3051590.301803-0.332922-0.985614-1.4249001.274310-0.9301791.142811-1.392758-1.231206-0.0219522
1740.4933431.4126090.4148201.0525160.158572-0.793334-1.2843440.549108-0.3169500.969783-1.129518-1.4854450.0098932
1750.3327581.744744-0.3893550.1516611.422412-1.129824-1.3445820.549108-0.4220752.224236-1.612125-1.4854450.2805752
1760.2092320.2276940.0127320.1516611.422412-1.033684-1.3546221.354888-0.2293461.834923-1.568252-1.4006990.2964982
1771.3950861.5831651.3652081.502943-0.262708-0.392751-1.2743051.596623-0.4220751.791666-1.524378-1.428948-0.5951602
\n", + "

178 rows × 14 columns

\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "0 1.518613 -0.562250 0.232053 -1.169593 1.913905 \n", + "1 0.246290 -0.499413 -0.827996 -2.490847 0.018145 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "3 1.691550 -0.346811 0.487926 -0.809251 0.930918 \n", + "4 0.295700 0.227694 1.840403 0.451946 1.281985 \n", + ".. ... ... ... ... ... \n", + "173 0.876275 2.974543 0.305159 0.301803 -0.332922 \n", + "174 0.493343 1.412609 0.414820 1.052516 0.158572 \n", + "175 0.332758 1.744744 -0.389355 0.151661 1.422412 \n", + "176 0.209232 0.227694 0.012732 0.151661 1.422412 \n", + "177 1.395086 1.583165 1.365208 1.502943 -0.262708 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "0 0.808997 1.034819 -0.659563 1.224884 \n", + "1 0.568648 0.733629 -0.820719 -0.544721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "3 2.491446 1.466525 -0.981875 1.032155 \n", + "4 0.808997 0.663351 0.226796 0.401404 \n", + ".. ... ... ... ... \n", + "173 -0.985614 -1.424900 1.274310 -0.930179 \n", + "174 -0.793334 -1.284344 0.549108 -0.316950 \n", + "175 -1.129824 -1.344582 0.549108 -0.422075 \n", + "176 -1.033684 -1.354622 1.354888 -0.229346 \n", + "177 -0.392751 -1.274305 1.596623 -0.422075 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline class \n", + "0 0.251717 0.362177 1.847920 1.013009 0 \n", + "1 -0.293321 0.406051 1.113449 0.965242 0 \n", + "2 0.269020 0.318304 0.788587 1.395148 0 \n", + "3 1.186068 -0.427544 1.184071 2.334574 0 \n", + "4 -0.319276 0.362177 0.449601 -0.037874 0 \n", + ".. ... ... ... ... ... \n", + "173 1.142811 -1.392758 -1.231206 -0.021952 2 \n", + "174 0.969783 -1.129518 -1.485445 0.009893 2 \n", + "175 2.224236 -1.612125 -1.485445 0.280575 2 \n", + "176 1.834923 -1.568252 -1.400699 0.296498 2 \n", + "177 1.791666 -1.524378 -1.428948 -0.595160 2 \n", + "\n", + "[178 rows x 14 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Create a copy of the original 'wine_df' dataframe to ensure we're not modifying the original data\n", "wine_scaled = predictors_standardized.copy()\n", @@ -289,10 +927,56 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "836f930c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " alcohol malic_acid ash alcalinity_of_ash \\\n", + "count 1.780000e+02 1.780000e+02 1.780000e+02 1.780000e+02 \n", + "mean -8.382808e-16 -1.197544e-16 -8.370333e-16 -3.991813e-17 \n", + "std 1.002821e+00 1.002821e+00 1.002821e+00 1.002821e+00 \n", + "min -2.434235e+00 -1.432983e+00 -3.679162e+00 -2.671018e+00 \n", + "25% -7.882448e-01 -6.587486e-01 -5.721225e-01 -6.891372e-01 \n", + "50% 6.099988e-02 -4.231120e-01 -2.382132e-02 1.518295e-03 \n", + "75% 8.361286e-01 6.697929e-01 6.981085e-01 6.020883e-01 \n", + "max 2.259772e+00 3.109192e+00 3.156325e+00 3.154511e+00 \n", + "\n", + " magnesium total_phenols flavanoids nonflavanoid_phenols \\\n", + "count 1.780000e+02 178.000000 1.780000e+02 1.780000e+02 \n", + "mean -3.991813e-17 0.000000 -3.991813e-16 3.592632e-16 \n", + "std 1.002821e+00 1.002821 1.002821e+00 1.002821e+00 \n", + "min -2.088255e+00 -2.107246 -1.695971e+00 -1.868234e+00 \n", + "25% -8.244151e-01 -0.885468 -8.275393e-01 -7.401412e-01 \n", + "50% -1.222817e-01 0.095960 1.061497e-01 -1.760948e-01 \n", + "75% 5.096384e-01 0.808997 8.490851e-01 6.095413e-01 \n", + "max 4.371372e+00 2.539515 3.062832e+00 2.402403e+00 \n", + "\n", + " proanthocyanins color_intensity hue \\\n", + "count 1.780000e+02 1.780000e+02 1.780000e+02 \n", + "mean -1.197544e-16 2.494883e-17 1.995907e-16 \n", + "std 1.002821e+00 1.002821e+00 1.002821e+00 \n", + "min -2.069034e+00 -1.634288e+00 -2.094732e+00 \n", + "25% -5.972835e-01 -7.951025e-01 -7.675624e-01 \n", + "50% -6.289785e-02 -1.592246e-01 3.312687e-02 \n", + "75% 6.291754e-01 4.939560e-01 7.131644e-01 \n", + "max 3.485073e+00 3.435432e+00 3.301694e+00 \n", + "\n", + " od280/od315_of_diluted_wines proline class \n", + "count 1.780000e+02 1.780000e+02 178.000000 \n", + "mean 3.193450e-16 -1.596725e-16 0.938202 \n", + "std 1.002821e+00 1.002821e+00 0.775035 \n", + "min -1.895054e+00 -1.493188e+00 0.000000 \n", + "25% -9.522483e-01 -7.846378e-01 0.000000 \n", + "50% 2.377348e-01 -2.337204e-01 1.000000 \n", + "75% 7.885875e-01 7.582494e-01 2.000000 \n", + "max 1.960915e+00 2.971473e+00 2.000000 \n" + ] + } + ], "source": [ "print(wine_scaled.describe())\n", "# all the predictors seem to be on comparable scales." @@ -300,7 +984,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "72c101f2", "metadata": {}, "outputs": [], @@ -344,7 +1028,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "08818c64", "metadata": {}, "outputs": [], @@ -355,10 +1039,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "45dfedef", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_neighbors': range(1, 51)}" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 2. Define a parameter grid for n_neighbors ranging from 1 to 50\n", "parameter_grid = {\n", @@ -370,7 +1065,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "94bd9d0f", "metadata": {}, "outputs": [], @@ -385,10 +1080,999 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "4bb32f37", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
GridSearchCV(cv=10, estimator=KNeighborsClassifier(),\n",
+       "             param_grid={'n_neighbors': range(1, 51)})
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "GridSearchCV(cv=10, estimator=KNeighborsClassifier(),\n", + " param_grid={'n_neighbors': range(1, 51)})" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 4.1 Fit model on training data.\n", "grid_search.fit(X_train, y_train)" @@ -396,10 +2080,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "721f7423", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_neighbors': 7}" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# 4.2 Return the best n_neighbours\n", "grid_search.best_params_\n", @@ -419,10 +2114,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "ffefa9f2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "7" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# assign the best k from grid search to a variable\n", "best_k = grid_search.best_params_[\"n_neighbors\"]\n", @@ -431,7 +2137,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "f958b9e6", "metadata": {}, "outputs": [], @@ -442,10 +2148,827 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "6ea7d9b2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
KNeighborsClassifier(n_neighbors=7)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "KNeighborsClassifier(n_neighbors=7)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# fit the model on training data\n", "knn_final.fit(X_train, y_train)" @@ -453,10 +2976,1058 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "id": "fdb01d9b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
alcoholmalic_acidashalcalinity_of_ashmagnesiumtotal_phenolsflavanoidsnonflavanoid_phenolsproanthocyaninscolor_intensityhueod280/od315_of_diluted_winesprolineclasspredicted_class
102-0.8160380.1020210.3417130.451946-0.1222820.4244380.081051-0.176095-0.492158-0.976782-0.6907841.085200-0.98366911
84-1.433671-1.2983340.780354-0.448909-0.403135-0.1524020.181447-1.1430311.330009-0.868639-0.7346570.661468-0.72254011
96-1.470729-0.1942081.3652080.6020882.405399-1.113800-1.043392-1.787656-0.054137-1.106553-0.032683-0.496736-0.38816812
65-0.778980-1.0110810.707247-0.418881-0.1222820.2001110.6231930.0656390.856946-0.1981561.020278-0.440238-0.21939010
79-0.3713431.3767030.1223921.0525160.0883580.8570670.5227960.5491080.629175-1.0762731.0202780.732090-0.90405611
171.024507-0.6879230.9265670.1516611.0713451.0493471.3761680.3073740.2261960.6669840.757038-0.0588781.22000100
109-1.717782-0.8854091.2189950.151661-0.4031350.7128580.894264-0.5789851.575301-1.0416670.0111900.915707-0.21302111
113-1.964835-1.4329830.4879260.451946-0.8244150.296251-0.0193460.468530-0.264388-0.8556620.625418-0.426113-0.99640711
281.073917-0.3916941.584529-0.0285100.5096381.0493470.9444620.0656390.296279-0.2414131.2835181.1134490.53533500
1590.592164-0.5981560.9996740.902373-0.7542020.488531-0.9329561.2743101.2248842.894719-1.699872-1.174708-0.40409122
380.085705-0.750759-0.974210-1.199622-0.1222820.1680650.613153-0.659563-0.387033-0.5874690.9764050.1106150.86970700
340.629222-0.4814601.036228-0.1486240.7202780.0879480.502717-0.578985-0.089179-0.3711840.6254180.3648551.10854400
125-1.149560-0.158301-0.7183360.451946-1.0350550.4885310.6231930.065639-0.422075-0.994085-0.4275440.943956-1.17473811
115-2.434235-0.741782-0.6086760.602088-1.0350550.2642050.1412891.2743100.734300-1.3660953.3016940.364855-1.08238811
711.061565-0.7417821.1093341.653086-0.9648421.0493470.834026-1.2236100.489009-0.7258911.7661260.774463-1.07283411
760.036295-1.289357-2.399793-1.049479-0.964842-0.5529840.000733-0.981875-0.229346-0.1981561.020278-0.185998-1.13015511
131-0.1489950.5867590.1223920.1516610.298998-1.594500-0.812480-0.981875-1.3331590.147900-0.954024-1.683187-0.69069522
330.938038-0.7238291.2189950.0015182.2649721.0493470.7135501.113154-0.4220750.1479001.2835180.5484721.55437300
60-0.828391-1.109824-0.316249-1.0494790.088358-0.392751-0.9429952.160669-2.069034-0.7734741.283518-1.330077-0.21302111
190.7898060.6855020.707247-1.2897071.1415580.6487641.004700-1.5459220.1210710.0181290.0111901.0569520.31242000
114-1.137207-0.8495020.4879260.902373-1.1052680.4244380.2617650.549108-0.965221-0.933525-0.1204300.816836-1.15244711
471.110975-0.589180-0.901103-1.0494790.0883581.2896971.366128-1.2236100.9620710.450699-0.2081771.0145780.75824900
481.358028-0.2839740.122392-0.2086810.2287850.7288810.894264-0.3372511.3825720.4939560.4937970.1953610.99708600
1581.654492-0.5891801.2189951.653086-0.1222820.808997-0.7221231.3548881.9432383.435432-1.699872-0.920468-0.27671122
133-0.3713431.089450-0.0238210.6020880.439425-0.953567-0.832559-1.545922-1.315638-0.025128-0.778531-1.866805-0.46778122
137-0.5813382.8488700.9996741.653086-0.262708-0.809357-1.4349392.160669-0.860096-0.025128-0.603037-1.301828-0.73846322
154-0.519575-0.939268-0.9742100.1516610.228785-1.306080-1.4550191.354888-0.3344711.099554-1.655999-1.499570-0.34040122
136-0.9272122.1397160.6341400.451946-0.754202-1.466313-1.5654551.354888-1.385721-0.522583-0.910151-1.895054-0.08564122
20.1968790.0212311.109334-0.2687380.0883580.8089971.215533-0.4984072.1359680.2690200.3183040.7885871.39514800
1680.7156900.2187171.1824411.5029430.369212-1.193917-1.1939870.226796-0.0891791.558078-0.954024-1.1464590.00989322
117-0.717217-0.652016-0.6452290.9023730.579852-0.4728680.060971-0.1760950.033467-1.2968840.4499240.491974-1.27982711
320.839217-0.454530-0.023821-0.6891370.2989980.2001110.6633510.4685300.664217-0.5269091.1957720.3648550.77417200
220.876275-0.427600-0.023821-0.8693080.0883580.5045540.854105-0.7401410.173634-0.5442120.6692911.9609150.91747400
108-0.964270-0.939268-1.559065-0.148624-0.5435620.1039720.0107730.2267960.856946-1.020039-0.4275440.576721-1.38491511
73-0.013116-0.5981560.8534603.1545112.7564651.6101630.864145-1.2236100.646696-0.7388681.5467591.2546940.75824910
77-1.4336710.496993-0.499016-0.4489090.860705-0.921521-0.7120830.549108-1.122909-1.0416670.011190-0.129500-0.78623011
1420.6415740.7483381.2921011.202658-0.192495-1.193917-1.5152571.113154-1.823742-0.306298-0.295924-0.779224-0.72254022
91.061565-0.885409-0.352802-1.049479-0.1222821.0974171.125176-1.1430310.4539670.9351770.2305571.3253160.94931900
85-0.408401-1.217544-0.462462-0.448909-0.052068-0.152402-0.089624-0.498407-0.229346-1.0546441.1957720.774463-0.94545511
580.888627-0.8135950.487926-0.8392800.5798521.7703961.647239-1.3847660.7868630.753498-0.2959240.3648551.71359800
451.4939071.5293050.268606-0.1786530.7904920.8891140.623193-0.498407-0.5972840.078689-0.3836701.0145781.06077600
1750.3327581.744744-0.3893550.1516611.422412-1.129824-1.3445820.549108-0.4220752.224236-1.612125-1.4854450.28057522
421.086270-0.4006700.816907-1.3497640.0883581.5300471.536802-1.5459220.1911540.160877-0.3397971.3394401.10854400
1430.7651012.346179-0.0603750.151661-0.543562-0.472868-1.2341460.871420-1.000263-0.284670-0.208177-0.793348-0.62700522
1771.3950861.5831651.3652081.502943-0.262708-0.392751-1.2743051.596623-0.4220751.791666-1.524378-1.428948-0.59516022
\n", + "
" + ], + "text/plain": [ + " alcohol malic_acid ash alcalinity_of_ash magnesium \\\n", + "102 -0.816038 0.102021 0.341713 0.451946 -0.122282 \n", + "84 -1.433671 -1.298334 0.780354 -0.448909 -0.403135 \n", + "96 -1.470729 -0.194208 1.365208 0.602088 2.405399 \n", + "65 -0.778980 -1.011081 0.707247 -0.418881 -0.122282 \n", + "79 -0.371343 1.376703 0.122392 1.052516 0.088358 \n", + "17 1.024507 -0.687923 0.926567 0.151661 1.071345 \n", + "109 -1.717782 -0.885409 1.218995 0.151661 -0.403135 \n", + "113 -1.964835 -1.432983 0.487926 0.451946 -0.824415 \n", + "28 1.073917 -0.391694 1.584529 -0.028510 0.509638 \n", + "159 0.592164 -0.598156 0.999674 0.902373 -0.754202 \n", + "38 0.085705 -0.750759 -0.974210 -1.199622 -0.122282 \n", + "34 0.629222 -0.481460 1.036228 -0.148624 0.720278 \n", + "125 -1.149560 -0.158301 -0.718336 0.451946 -1.035055 \n", + "115 -2.434235 -0.741782 -0.608676 0.602088 -1.035055 \n", + "71 1.061565 -0.741782 1.109334 1.653086 -0.964842 \n", + "76 0.036295 -1.289357 -2.399793 -1.049479 -0.964842 \n", + "131 -0.148995 0.586759 0.122392 0.151661 0.298998 \n", + "33 0.938038 -0.723829 1.218995 0.001518 2.264972 \n", + "60 -0.828391 -1.109824 -0.316249 -1.049479 0.088358 \n", + "19 0.789806 0.685502 0.707247 -1.289707 1.141558 \n", + "114 -1.137207 -0.849502 0.487926 0.902373 -1.105268 \n", + "47 1.110975 -0.589180 -0.901103 -1.049479 0.088358 \n", + "48 1.358028 -0.283974 0.122392 -0.208681 0.228785 \n", + "158 1.654492 -0.589180 1.218995 1.653086 -0.122282 \n", + "133 -0.371343 1.089450 -0.023821 0.602088 0.439425 \n", + "137 -0.581338 2.848870 0.999674 1.653086 -0.262708 \n", + "154 -0.519575 -0.939268 -0.974210 0.151661 0.228785 \n", + "136 -0.927212 2.139716 0.634140 0.451946 -0.754202 \n", + "2 0.196879 0.021231 1.109334 -0.268738 0.088358 \n", + "168 0.715690 0.218717 1.182441 1.502943 0.369212 \n", + "117 -0.717217 -0.652016 -0.645229 0.902373 0.579852 \n", + "32 0.839217 -0.454530 -0.023821 -0.689137 0.298998 \n", + "22 0.876275 -0.427600 -0.023821 -0.869308 0.088358 \n", + "108 -0.964270 -0.939268 -1.559065 -0.148624 -0.543562 \n", + "73 -0.013116 -0.598156 0.853460 3.154511 2.756465 \n", + "77 -1.433671 0.496993 -0.499016 -0.448909 0.860705 \n", + "142 0.641574 0.748338 1.292101 1.202658 -0.192495 \n", + "9 1.061565 -0.885409 -0.352802 -1.049479 -0.122282 \n", + "85 -0.408401 -1.217544 -0.462462 -0.448909 -0.052068 \n", + "58 0.888627 -0.813595 0.487926 -0.839280 0.579852 \n", + "45 1.493907 1.529305 0.268606 -0.178653 0.790492 \n", + "175 0.332758 1.744744 -0.389355 0.151661 1.422412 \n", + "42 1.086270 -0.400670 0.816907 -1.349764 0.088358 \n", + "143 0.765101 2.346179 -0.060375 0.151661 -0.543562 \n", + "177 1.395086 1.583165 1.365208 1.502943 -0.262708 \n", + "\n", + " total_phenols flavanoids nonflavanoid_phenols proanthocyanins \\\n", + "102 0.424438 0.081051 -0.176095 -0.492158 \n", + "84 -0.152402 0.181447 -1.143031 1.330009 \n", + "96 -1.113800 -1.043392 -1.787656 -0.054137 \n", + "65 0.200111 0.623193 0.065639 0.856946 \n", + "79 0.857067 0.522796 0.549108 0.629175 \n", + "17 1.049347 1.376168 0.307374 0.226196 \n", + "109 0.712858 0.894264 -0.578985 1.575301 \n", + "113 0.296251 -0.019346 0.468530 -0.264388 \n", + "28 1.049347 0.944462 0.065639 0.296279 \n", + "159 0.488531 -0.932956 1.274310 1.224884 \n", + "38 0.168065 0.613153 -0.659563 -0.387033 \n", + "34 0.087948 0.502717 -0.578985 -0.089179 \n", + "125 0.488531 0.623193 0.065639 -0.422075 \n", + "115 0.264205 0.141289 1.274310 0.734300 \n", + "71 1.049347 0.834026 -1.223610 0.489009 \n", + "76 -0.552984 0.000733 -0.981875 -0.229346 \n", + "131 -1.594500 -0.812480 -0.981875 -1.333159 \n", + "33 1.049347 0.713550 1.113154 -0.422075 \n", + "60 -0.392751 -0.942995 2.160669 -2.069034 \n", + "19 0.648764 1.004700 -1.545922 0.121071 \n", + "114 0.424438 0.261765 0.549108 -0.965221 \n", + "47 1.289697 1.366128 -1.223610 0.962071 \n", + "48 0.728881 0.894264 -0.337251 1.382572 \n", + "158 0.808997 -0.722123 1.354888 1.943238 \n", + "133 -0.953567 -0.832559 -1.545922 -1.315638 \n", + "137 -0.809357 -1.434939 2.160669 -0.860096 \n", + "154 -1.306080 -1.455019 1.354888 -0.334471 \n", + "136 -1.466313 -1.565455 1.354888 -1.385721 \n", + "2 0.808997 1.215533 -0.498407 2.135968 \n", + "168 -1.193917 -1.193987 0.226796 -0.089179 \n", + "117 -0.472868 0.060971 -0.176095 0.033467 \n", + "32 0.200111 0.663351 0.468530 0.664217 \n", + "22 0.504554 0.854105 -0.740141 0.173634 \n", + "108 0.103972 0.010773 0.226796 0.856946 \n", + "73 1.610163 0.864145 -1.223610 0.646696 \n", + "77 -0.921521 -0.712083 0.549108 -1.122909 \n", + "142 -1.193917 -1.515257 1.113154 -1.823742 \n", + "9 1.097417 1.125176 -1.143031 0.453967 \n", + "85 -0.152402 -0.089624 -0.498407 -0.229346 \n", + "58 1.770396 1.647239 -1.384766 0.786863 \n", + "45 0.889114 0.623193 -0.498407 -0.597284 \n", + "175 -1.129824 -1.344582 0.549108 -0.422075 \n", + "42 1.530047 1.536802 -1.545922 0.191154 \n", + "143 -0.472868 -1.234146 0.871420 -1.000263 \n", + "177 -0.392751 -1.274305 1.596623 -0.422075 \n", + "\n", + " color_intensity hue od280/od315_of_diluted_wines proline class \\\n", + "102 -0.976782 -0.690784 1.085200 -0.983669 1 \n", + "84 -0.868639 -0.734657 0.661468 -0.722540 1 \n", + "96 -1.106553 -0.032683 -0.496736 -0.388168 1 \n", + "65 -0.198156 1.020278 -0.440238 -0.219390 1 \n", + "79 -1.076273 1.020278 0.732090 -0.904056 1 \n", + "17 0.666984 0.757038 -0.058878 1.220001 0 \n", + "109 -1.041667 0.011190 0.915707 -0.213021 1 \n", + "113 -0.855662 0.625418 -0.426113 -0.996407 1 \n", + "28 -0.241413 1.283518 1.113449 0.535335 0 \n", + "159 2.894719 -1.699872 -1.174708 -0.404091 2 \n", + "38 -0.587469 0.976405 0.110615 0.869707 0 \n", + "34 -0.371184 0.625418 0.364855 1.108544 0 \n", + "125 -0.994085 -0.427544 0.943956 -1.174738 1 \n", + "115 -1.366095 3.301694 0.364855 -1.082388 1 \n", + "71 -0.725891 1.766126 0.774463 -1.072834 1 \n", + "76 -0.198156 1.020278 -0.185998 -1.130155 1 \n", + "131 0.147900 -0.954024 -1.683187 -0.690695 2 \n", + "33 0.147900 1.283518 0.548472 1.554373 0 \n", + "60 -0.773474 1.283518 -1.330077 -0.213021 1 \n", + "19 0.018129 0.011190 1.056952 0.312420 0 \n", + "114 -0.933525 -0.120430 0.816836 -1.152447 1 \n", + "47 0.450699 -0.208177 1.014578 0.758249 0 \n", + "48 0.493956 0.493797 0.195361 0.997086 0 \n", + "158 3.435432 -1.699872 -0.920468 -0.276711 2 \n", + "133 -0.025128 -0.778531 -1.866805 -0.467781 2 \n", + "137 -0.025128 -0.603037 -1.301828 -0.738463 2 \n", + "154 1.099554 -1.655999 -1.499570 -0.340401 2 \n", + "136 -0.522583 -0.910151 -1.895054 -0.085641 2 \n", + "2 0.269020 0.318304 0.788587 1.395148 0 \n", + "168 1.558078 -0.954024 -1.146459 0.009893 2 \n", + "117 -1.296884 0.449924 0.491974 -1.279827 1 \n", + "32 -0.526909 1.195772 0.364855 0.774172 0 \n", + "22 -0.544212 0.669291 1.960915 0.917474 0 \n", + "108 -1.020039 -0.427544 0.576721 -1.384915 1 \n", + "73 -0.738868 1.546759 1.254694 0.758249 1 \n", + "77 -1.041667 0.011190 -0.129500 -0.786230 1 \n", + "142 -0.306298 -0.295924 -0.779224 -0.722540 2 \n", + "9 0.935177 0.230557 1.325316 0.949319 0 \n", + "85 -1.054644 1.195772 0.774463 -0.945455 1 \n", + "58 0.753498 -0.295924 0.364855 1.713598 0 \n", + "45 0.078689 -0.383670 1.014578 1.060776 0 \n", + "175 2.224236 -1.612125 -1.485445 0.280575 2 \n", + "42 0.160877 -0.339797 1.339440 1.108544 0 \n", + "143 -0.284670 -0.208177 -0.793348 -0.627005 2 \n", + "177 1.791666 -1.524378 -1.428948 -0.595160 2 \n", + "\n", + " predicted_class \n", + "102 1 \n", + "84 1 \n", + "96 2 \n", + "65 0 \n", + "79 1 \n", + "17 0 \n", + "109 1 \n", + "113 1 \n", + "28 0 \n", + "159 2 \n", + "38 0 \n", + "34 0 \n", + "125 1 \n", + "115 1 \n", + "71 1 \n", + "76 1 \n", + "131 2 \n", + "33 0 \n", + "60 1 \n", + "19 0 \n", + "114 1 \n", + "47 0 \n", + "48 0 \n", + "158 2 \n", + "133 2 \n", + "137 2 \n", + "154 2 \n", + "136 2 \n", + "2 0 \n", + "168 2 \n", + "117 1 \n", + "32 0 \n", + "22 0 \n", + "108 1 \n", + "73 0 \n", + "77 1 \n", + "142 2 \n", + "9 0 \n", + "85 1 \n", + "58 0 \n", + "45 0 \n", + "175 2 \n", + "42 0 \n", + "143 2 \n", + "177 2 " + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# make predictions on the test set\n", "y_pred = knn_final.predict(X_test)\n", @@ -468,10 +4039,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "id": "2de5a14b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "0.9333333333333333" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# evaluate accuracy_score\n", "accuracy_score(y_test, y_pred)\n",