|
7 | 7 | "source": [
|
8 | 8 | "# Multicolinearity\n",
|
9 | 9 | "\n",
|
| 10 | + "[Machine Learning Interpretability course](https://www.trainindata.com/p/machine-learning-interpretability)\n", |
| 11 | + "\n", |
10 | 12 | "In this notebook we will check the impact of colinearity on the model coefficients."
|
11 | 13 | ]
|
12 | 14 | },
|
|
15 | 17 | "execution_count": 1,
|
16 | 18 | "id": "90e85181",
|
17 | 19 | "metadata": {},
|
18 |
| - "outputs": [ |
19 |
| - { |
20 |
| - "name": "stderr", |
21 |
| - "output_type": "stream", |
22 |
| - "text": [ |
23 |
| - "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\scipy\\__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.25.1\n", |
24 |
| - " warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n" |
25 |
| - ] |
26 |
| - } |
27 |
| - ], |
| 20 | + "outputs": [], |
28 | 21 | "source": [
|
29 | 22 | "import pandas as pd\n",
|
30 | 23 | "import matplotlib.pyplot as plt\n",
|
|
161 | 154 | "Dep. Variable: MedHouseVal R-squared: 0.550\n",
|
162 | 155 | "Model: OLS Adj. R-squared: 0.550\n",
|
163 | 156 | "Method: Least Squares F-statistic: 2942.\n",
|
164 |
| - "Date: Tue, 01 Aug 2023 Prob (F-statistic): 0.00\n", |
165 |
| - "Time: 18:06:41 Log-Likelihood: -16796.\n", |
| 157 | + "Date: Tue, 14 Nov 2023 Prob (F-statistic): 0.00\n", |
| 158 | + "Time: 10:49:32 Log-Likelihood: -16796.\n", |
166 | 159 | "No. Observations: 14448 AIC: 3.361e+04\n",
|
167 | 160 | "Df Residuals: 14441 BIC: 3.366e+04\n",
|
168 | 161 | "Df Model: 6 \n",
|
|
396 | 389 | "Dep. Variable: MedHouseVal R-squared: 0.523\n",
|
397 | 390 | "Model: OLS Adj. R-squared: 0.523\n",
|
398 | 391 | "Method: Least Squares F-statistic: 3168.\n",
|
399 |
| - "Date: Tue, 01 Aug 2023 Prob (F-statistic): 0.00\n", |
400 |
| - "Time: 18:06:42 Log-Likelihood: -17216.\n", |
| 392 | + "Date: Tue, 14 Nov 2023 Prob (F-statistic): 0.00\n", |
| 393 | + "Time: 10:49:33 Log-Likelihood: -17216.\n", |
401 | 394 | "No. Observations: 14448 AIC: 3.444e+04\n",
|
402 | 395 | "Df Residuals: 14442 BIC: 3.449e+04\n",
|
403 | 396 | "Df Model: 5 \n",
|
|
693 | 686 | "Dep. Variable: MedHouseVal R-squared: 0.521\n",
|
694 | 687 | "Model: OLS Adj. R-squared: 0.521\n",
|
695 | 688 | "Method: Least Squares F-statistic: 5246.\n",
|
696 |
| - "Date: Tue, 01 Aug 2023 Prob (F-statistic): 0.00\n", |
697 |
| - "Time: 18:06:43 Log-Likelihood: -17241.\n", |
| 689 | + "Date: Tue, 14 Nov 2023 Prob (F-statistic): 0.00\n", |
| 690 | + "Time: 10:49:35 Log-Likelihood: -17241.\n", |
698 | 691 | "No. Observations: 14448 AIC: 3.449e+04\n",
|
699 | 692 | "Df Residuals: 14444 BIC: 3.452e+04\n",
|
700 | 693 | "Df Model: 3 \n",
|
|
814 | 807 | "id": "6ee1081e",
|
815 | 808 | "metadata": {},
|
816 | 809 | "outputs": [
|
| 810 | + { |
| 811 | + "name": "stderr", |
| 812 | + "output_type": "stream", |
| 813 | + "text": [ |
| 814 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 815 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 816 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 817 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 818 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 819 | + " if pd.api.types.is_categorical_dtype(vector):\n" |
| 820 | + ] |
| 821 | + }, |
817 | 822 | {
|
818 | 823 | "data": {
|
819 | 824 | "text/plain": [
|
|
853 | 858 | "id": "fc8b100f",
|
854 | 859 | "metadata": {},
|
855 | 860 | "outputs": [
|
| 861 | + { |
| 862 | + "name": "stderr", |
| 863 | + "output_type": "stream", |
| 864 | + "text": [ |
| 865 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 866 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 867 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 868 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 869 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 870 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 871 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 872 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 873 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 874 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 875 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 876 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 877 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 878 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 879 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 880 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 881 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n", |
| 882 | + " with pd.option_context('mode.use_inf_as_na', True):\n", |
| 883 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", |
| 884 | + " grouped_data = data.groupby(\n", |
| 885 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 886 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 887 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1498: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead\n", |
| 888 | + " if pd.api.types.is_categorical_dtype(vector):\n", |
| 889 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1119: FutureWarning: use_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n", |
| 890 | + " with pd.option_context('mode.use_inf_as_na', True):\n", |
| 891 | + "C:\\Users\\Sole\\Documents\\Repositories\\envs\\fsml\\lib\\site-packages\\seaborn\\_oldcore.py:1057: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", |
| 892 | + " grouped_data = data.groupby(\n" |
| 893 | + ] |
| 894 | + }, |
856 | 895 | {
|
857 | 896 | "data": {
|
858 | 897 | "text/plain": [
|
859 |
| - "<seaborn.axisgrid.JointGrid at 0x1341f5be9e0>" |
| 898 | + "<seaborn.axisgrid.JointGrid at 0x29750e466b0>" |
860 | 899 | ]
|
861 | 900 | },
|
862 | 901 | "execution_count": 21,
|
|
0 commit comments