diff --git a/DataCleaning/suzzzal_data_imputation/data_imputation.ipynb b/DataCleaning/suzzzal_data_imputation/data_imputation.ipynb
new file mode 100644
index 0000000..29a79a4
--- /dev/null
+++ b/DataCleaning/suzzzal_data_imputation/data_imputation.ipynb
@@ -0,0 +1,796 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "1240844f",
+   "metadata": {},
+   "source": [
+    " ",
+    "\n",
+    " ",
+    " "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb43cadf",
+   "metadata": {},
+   "source": [
+    " ",
+    " ",
+    "\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "ef0adb0e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
+    "from sklearn.impute import SimpleImputer, KNNImputer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "2d2ead02",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>P_NAME</th>\n",
+       "      <th>P_STATUS</th>\n",
+       "      <th>P_MASS</th>\n",
+       "      <th>P_MASS_ERROR_MIN</th>\n",
+       "      <th>P_MASS_ERROR_MAX</th>\n",
+       "      <th>P_RADIUS</th>\n",
+       "      <th>P_RADIUS_ERROR_MIN</th>\n",
+       "      <th>P_RADIUS_ERROR_MAX</th>\n",
+       "      <th>P_YEAR</th>\n",
+       "      <th>P_UPDATED</th>\n",
+       "      <th>...</th>\n",
+       "      <th>P_HABZONE_CON</th>\n",
+       "      <th>P_TYPE_TEMP</th>\n",
+       "      <th>P_HABITABLE</th>\n",
+       "      <th>P_ESI</th>\n",
+       "      <th>S_CONSTELLATION</th>\n",
+       "      <th>S_CONSTELLATION_ABR</th>\n",
+       "      <th>S_CONSTELLATION_ENG</th>\n",
+       "      <th>P_RADIUS_EST</th>\n",
+       "      <th>P_MASS_EST</th>\n",
+       "      <th>P_SEMI_MAJOR_AXIS_EST</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>11 Com b</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>6165.86330</td>\n",
+       "      <td>-476.74200</td>\n",
+       "      <td>476.74200</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2007</td>\n",
+       "      <td>2014-05-14</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Hot</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.083813</td>\n",
+       "      <td>Coma Berenices</td>\n",
+       "      <td>Com</td>\n",
+       "      <td>Berenice's Hair</td>\n",
+       "      <td>12.082709</td>\n",
+       "      <td>6165.86330</td>\n",
+       "      <td>1.29</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>11 UMi b</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>4684.78480</td>\n",
+       "      <td>-794.57001</td>\n",
+       "      <td>794.57001</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2009</td>\n",
+       "      <td>2018-09-06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Hot</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.082414</td>\n",
+       "      <td>Ursa Minor</td>\n",
+       "      <td>UMi</td>\n",
+       "      <td>Little Bear</td>\n",
+       "      <td>12.229641</td>\n",
+       "      <td>4684.78480</td>\n",
+       "      <td>1.53</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>14 And b</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1525.57440</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2008</td>\n",
+       "      <td>2014-05-14</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Hot</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.081917</td>\n",
+       "      <td>Andromeda</td>\n",
+       "      <td>And</td>\n",
+       "      <td>Andromeda</td>\n",
+       "      <td>12.848516</td>\n",
+       "      <td>1525.57440</td>\n",
+       "      <td>0.83</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>14 Her b</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>1481.07850</td>\n",
+       "      <td>-47.67420</td>\n",
+       "      <td>47.67420</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>2002</td>\n",
+       "      <td>2018-09-06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>Cold</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.145241</td>\n",
+       "      <td>Hercules</td>\n",
+       "      <td>Her</td>\n",
+       "      <td>Hercules</td>\n",
+       "      <td>12.865261</td>\n",
+       "      <td>1481.07850</td>\n",
+       "      <td>2.93</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>16 Cyg B b</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>565.73385</td>\n",
+       "      <td>-25.42624</td>\n",
+       "      <td>25.42624</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1996</td>\n",
+       "      <td>2018-09-06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>Warm</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.368627</td>\n",
+       "      <td>Cygnus</td>\n",
+       "      <td>Cyg</td>\n",
+       "      <td>Swan</td>\n",
+       "      <td>13.421749</td>\n",
+       "      <td>565.73385</td>\n",
+       "      <td>1.66</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 112 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       P_NAME  P_STATUS      P_MASS  P_MASS_ERROR_MIN  P_MASS_ERROR_MAX  \\\n",
+       "0    11 Com b       3.0  6165.86330        -476.74200         476.74200   \n",
+       "1    11 UMi b       3.0  4684.78480        -794.57001         794.57001   \n",
+       "2    14 And b       3.0  1525.57440               NaN               NaN   \n",
+       "3    14 Her b       3.0  1481.07850         -47.67420          47.67420   \n",
+       "4  16 Cyg B b       3.0   565.73385         -25.42624          25.42624   \n",
+       "\n",
+       "   P_RADIUS  P_RADIUS_ERROR_MIN  P_RADIUS_ERROR_MAX  P_YEAR   P_UPDATED  ...  \\\n",
+       "0       NaN                 NaN                 NaN    2007  2014-05-14  ...   \n",
+       "1       NaN                 NaN                 NaN    2009  2018-09-06  ...   \n",
+       "2       NaN                 NaN                 NaN    2008  2014-05-14  ...   \n",
+       "3       NaN                 NaN                 NaN    2002  2018-09-06  ...   \n",
+       "4       NaN                 NaN                 NaN    1996  2018-09-06  ...   \n",
+       "\n",
+       "   P_HABZONE_CON  P_TYPE_TEMP  P_HABITABLE     P_ESI  S_CONSTELLATION  \\\n",
+       "0              0          Hot            0  0.083813   Coma Berenices   \n",
+       "1              0          Hot            0  0.082414       Ursa Minor   \n",
+       "2              0          Hot            0  0.081917        Andromeda   \n",
+       "3              0         Cold            0  0.145241         Hercules   \n",
+       "4              1         Warm            0  0.368627           Cygnus   \n",
+       "\n",
+       "   S_CONSTELLATION_ABR  S_CONSTELLATION_ENG  P_RADIUS_EST  P_MASS_EST  \\\n",
+       "0                  Com      Berenice's Hair     12.082709  6165.86330   \n",
+       "1                  UMi          Little Bear     12.229641  4684.78480   \n",
+       "2                  And            Andromeda     12.848516  1525.57440   \n",
+       "3                  Her             Hercules     12.865261  1481.07850   \n",
+       "4                  Cyg                 Swan     13.421749   565.73385   \n",
+       "\n",
+       "   P_SEMI_MAJOR_AXIS_EST  \n",
+       "0                   1.29  \n",
+       "1                   1.53  \n",
+       "2                   0.83  \n",
+       "3                   2.93  \n",
+       "4                   1.66  \n",
+       "\n",
+       "[5 rows x 112 columns]"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = pd.read_csv('../../datasets/full_data.csv')\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7d0eb63",
+   "metadata": {},
+   "source": [
+    " ",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "bdeeb104",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "P_ATMOSPHERE                    100.000000\n",
+       "P_ALT_NAMES                     100.000000\n",
+       "P_DETECTION_RADIUS              100.000000\n",
+       "P_GEO_ALBEDO                    100.000000\n",
+       "P_DETECTION_MASS                100.000000\n",
+       "S_MAGNETIC_FIELD                100.000000\n",
+       "S_DISC                          100.000000\n",
+       "P_TEMP_MEASURED                  99.876482\n",
+       "P_GEO_ALBEDO_ERROR_MIN           99.876482\n",
+       "P_GEO_ALBEDO_ERROR_MAX           99.876482\n",
+       "P_TPERI_ERROR_MAX                88.339921\n",
+       "P_TPERI_ERROR_MIN                88.339921\n",
+       "P_TPERI                          88.117589\n",
+       "P_OMEGA_ERROR_MIN                82.880435\n",
+       "P_OMEGA_ERROR_MAX                82.880435\n",
+       "P_ESCAPE                         82.559289\n",
+       "P_POTENTIAL                      82.559289\n",
+       "P_DENSITY                        82.559289\n",
+       "P_GRAVITY                        82.559289\n",
+       "P_OMEGA                          81.571146\n",
+       "P_INCLINATION_ERROR_MAX          79.990119\n",
+       "P_INCLINATION_ERROR_MIN          79.940711\n",
+       "P_INCLINATION                    79.150198\n",
+       "P_ECCENTRICITY_ERROR_MIN         76.012846\n",
+       "P_ECCENTRICITY_ERROR_MAX         76.012846\n",
+       "S_TYPE                           66.156126\n",
+       "P_ECCENTRICITY                   65.909091\n",
+       "P_IMPACT_PARAMETER_ERROR_MIN     65.242095\n",
+       "P_IMPACT_PARAMETER_ERROR_MAX     65.242095\n",
+       "P_IMPACT_PARAMETER               65.192688\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "missing_pct = df.isnull().mean() * 100\n",
+    "missing_pct.sort_values(ascending=False).head(30)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "775d2fe8",
+   "metadata": {},
+   "source": [
+    " ",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "277c6c2a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4048, 92)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "threshold = 80\n",
+    "cols_to_keep = missing_pct[missing_pct <= threshold].index\n",
+    "df_filtered = df[cols_to_keep]\n",
+    "df_filtered.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0d0f1145",
+   "metadata": {},
+   "source": [
+    " ",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "7b9ce26e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(78, 14)"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "num_cols = df_filtered.select_dtypes(include=['int64', 'float64']).columns\n",
+    "cat_cols = df_filtered.select_dtypes(include=['object', 'category']).columns\n",
+    "len(num_cols), len(cat_cols)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a6ed4d11",
+   "metadata": {},
+   "source": [
+    " ",
+    " ",
+    " ",
+    " ",
+    "-",
+    "\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "a70a4bcf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def make_preprocessor(num_imputer):\n",
+    "    num_pipeline = Pipeline([\n",
+    "        ('imputer', num_imputer),\n",
+    "        ('scaler', StandardScaler())\n",
+    "    ])\n",
+    "\n",
+    "    cat_pipeline = Pipeline([\n",
+    "        ('imputer', SimpleImputer(strategy='most_frequent')),\n",
+    "        ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))\n",
+    "    ])\n",
+    "\n",
+    "    return ColumnTransformer([\n",
+    "        ('num', num_pipeline, num_cols),\n",
+    "        ('cat', cat_pipeline, cat_cols)\n",
+    "    ])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "4a511168",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['mean', 'median', 'knn']"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "imputation_strategies = {\n",
+    "    'mean': SimpleImputer(strategy='mean', add_indicator=True),\n",
+    "    'median': SimpleImputer(strategy='median', add_indicator=True),\n",
+    "    'knn': KNNImputer(n_neighbors=5, weights='distance')\n",
+    "}\n",
+    "\n",
+    "list(imputation_strategies.keys())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f7ded51c",
+   "metadata": {},
+   "source": [
+    " ",
+    " ",
+    " ",
+    " ",
+    "\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bd122941",
+   "metadata": {},
+   "source": [
+    " ",
+    " ",
+    "\n",
+    " ",
+    " ",
+    " ",
+    "\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "373c3d60",
+   "metadata": {},
+   "source": [
+    " ",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "01fc282c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(4048, 14424)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.compose import ColumnTransformer\n",
+    "from sklearn.pipeline import Pipeline\n",
+    "\n",
+    "# Build preprocessor using KNN imputation for numerical features\n",
+    "knn_preprocessor = make_preprocessor(\n",
+    "    KNNImputer(n_neighbors=5, weights='distance')\n",
+    ")\n",
+    "\n",
+    "# Apply preprocessing (fit on full filtered dataset for demonstration)\n",
+    "X_clean_knn = knn_preprocessor.fit_transform(df_filtered)\n",
+    "\n",
+    "X_clean_knn.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "b873869a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>14414</th>\n",
+       "      <th>14415</th>\n",
+       "      <th>14416</th>\n",
+       "      <th>14417</th>\n",
+       "      <th>14418</th>\n",
+       "      <th>14419</th>\n",
+       "      <th>14420</th>\n",
+       "      <th>14421</th>\n",
+       "      <th>14422</th>\n",
+       "      <th>14423</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>5.377444</td>\n",
+       "      <td>-0.808152</td>\n",
+       "      <td>0.562088</td>\n",
+       "      <td>1.703192</td>\n",
+       "      <td>-1.413107</td>\n",
+       "      <td>1.028977</td>\n",
+       "      <td>-1.947139</td>\n",
+       "      <td>-0.022788</td>\n",
+       "      <td>0.025305</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>3.949894</td>\n",
+       "      <td>-1.470807</td>\n",
+       "      <td>1.044633</td>\n",
+       "      <td>0.041843</td>\n",
+       "      <td>-0.562006</td>\n",
+       "      <td>0.367639</td>\n",
+       "      <td>-1.407238</td>\n",
+       "      <td>-0.021202</td>\n",
+       "      <td>0.025257</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.904860</td>\n",
+       "      <td>-0.146014</td>\n",
+       "      <td>0.079920</td>\n",
+       "      <td>1.521368</td>\n",
+       "      <td>0.094414</td>\n",
+       "      <td>-0.116902</td>\n",
+       "      <td>-1.677188</td>\n",
+       "      <td>-0.023956</td>\n",
+       "      <td>0.025306</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.861972</td>\n",
+       "      <td>0.086431</td>\n",
+       "      <td>-0.089346</td>\n",
+       "      <td>0.952879</td>\n",
+       "      <td>0.192663</td>\n",
+       "      <td>-0.204938</td>\n",
+       "      <td>-3.296892</td>\n",
+       "      <td>-0.010724</td>\n",
+       "      <td>0.025269</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>-0.020291</td>\n",
+       "      <td>0.132817</td>\n",
+       "      <td>-0.123124</td>\n",
+       "      <td>0.489672</td>\n",
+       "      <td>0.194241</td>\n",
+       "      <td>-0.206154</td>\n",
+       "      <td>-4.916596</td>\n",
+       "      <td>-0.018849</td>\n",
+       "      <td>0.025294</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 14424 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   0         1         2         3         4         5         6      \\\n",
+       "0    0.0  5.377444 -0.808152  0.562088  1.703192 -1.413107  1.028977   \n",
+       "1    0.0  3.949894 -1.470807  1.044633  0.041843 -0.562006  0.367639   \n",
+       "2    0.0  0.904860 -0.146014  0.079920  1.521368  0.094414 -0.116902   \n",
+       "3    0.0  0.861972  0.086431 -0.089346  0.952879  0.192663 -0.204938   \n",
+       "4    0.0 -0.020291  0.132817 -0.123124  0.489672  0.194241 -0.206154   \n",
+       "\n",
+       "      7         8         9      ...  14414  14415  14416  14417  14418  \\\n",
+       "0 -1.947139 -0.022788  0.025305  ...    0.0    0.0    0.0    0.0    0.0   \n",
+       "1 -1.407238 -0.021202  0.025257  ...    0.0    0.0    0.0    0.0    0.0   \n",
+       "2 -1.677188 -0.023956  0.025306  ...    0.0    0.0    0.0    0.0    0.0   \n",
+       "3 -3.296892 -0.010724  0.025269  ...    0.0    0.0    0.0    0.0    0.0   \n",
+       "4 -4.916596 -0.018849  0.025294  ...    0.0    0.0    0.0    0.0    0.0   \n",
+       "\n",
+       "   14419  14420  14421  14422  14423  \n",
+       "0    0.0    0.0    0.0    0.0    0.0  \n",
+       "1    0.0    0.0    0.0    0.0    0.0  \n",
+       "2    0.0    0.0    0.0    0.0    0.0  \n",
+       "3    0.0    0.0    0.0    0.0    0.0  \n",
+       "4    0.0    0.0    0.0    0.0    0.0  \n",
+       "\n",
+       "[5 rows x 14424 columns]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Convert the transformed array back to a DataFrame for inspection\n",
+    "# (Feature names are omitted for simplicity)\n",
+    "df_clean_knn = pd.DataFrame(X_clean_knn)\n",
+    "\n",
+    "df_clean_knn.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "791cac62",
+   "metadata": {},
+   "source": [
+    "##After KNN Imputation\n",
+    " "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "59bf9f01",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    0.0\n",
+       "1    0.0\n",
+       "2    0.0\n",
+       "3    0.0\n",
+       "4    0.0\n",
+       "dtype: float64"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_clean_knn.isnull().mean().sort_values(ascending=False).head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

	P_NAME	P_STATUS	P_MASS	P_MASS_ERROR_MIN	P_MASS_ERROR_MAX	P_RADIUS	P_RADIUS_ERROR_MIN	P_RADIUS_ERROR_MAX	P_YEAR	P_UPDATED	...	P_HABZONE_CON	P_TYPE_TEMP	P_ESI	S_CONSTELLATION	S_CONSTELLATION_ABR	S_CONSTELLATION_ENG	P_RADIUS_EST	P_MASS_EST	P_SEMI_MAJOR_AXIS_EST
0	11 Com b	3.0	6165.86330	-476.74200	476.74200	NaN	NaN	NaN	2007	2014-05-14	...	0	Hot	0.083813	Coma Berenices	Com	Berenice's Hair	12.082709	6165.86330	1.29
1	11 UMi b	3.0	4684.78480	-794.57001	794.57001	NaN	NaN	NaN	2009	2018-09-06	...	0	Hot	0.082414	Ursa Minor	UMi	Little Bear	12.229641	4684.78480	1.53
2	14 And b	3.0	1525.57440	NaN	NaN	NaN	NaN	NaN	2008	2014-05-14	...	0	Hot	0.081917	Andromeda	And	Andromeda	12.848516	1525.57440	0.83
3	14 Her b	3.0	1481.07850	-47.67420	47.67420	NaN	NaN	NaN	2002	2018-09-06	...	0	Cold	0.145241	Hercules	Her	Hercules	12.865261	1481.07850	2.93
4	16 Cyg B b	3.0	565.73385	-25.42624	25.42624	NaN	NaN	NaN	1996	2018-09-06	...	1	Warm	0.368627	Cygnus	Cyg	Swan	13.421749	565.73385	1.66
	1	2	3	4	5	6	7	8	9	...
0	5.377444	-0.808152	0.562088	1.703192	-1.413107	1.028977	-1.947139	-0.022788	0.025305	...
1	3.949894	-1.470807	1.044633	0.041843	-0.562006	0.367639	-1.407238	-0.021202	0.025257	...
2	0.904860	-0.146014	0.079920	1.521368	0.094414	-0.116902	-1.677188	-0.023956	0.025306	...
3	0.861972	0.086431	-0.089346	0.952879	0.192663	-0.204938	-3.296892	-0.010724	0.025269	...
4	-0.020291	0.132817	-0.123124	0.489672	0.194241	-0.206154	-4.916596	-0.018849	0.025294	...