You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"# handling categorical data using dummies from pandas\r\n",
8
+
"\r\n",
9
+
"import pandas as pd"
10
+
],
11
+
"outputs": [],
12
+
"metadata": {}
13
+
},
14
+
{
15
+
"cell_type": "code",
16
+
"execution_count": 3,
17
+
"source": [
18
+
"data = pd.read_csv(\"E:\\github\\DS-ScriptsNook\\Machine Learning\\Libraries\\Data Preprocessing in ML\\Dataset\\place_area_price.csv\") #load the data\r\n",
19
+
"print(data)"
20
+
],
21
+
"outputs": [
22
+
{
23
+
"output_type": "stream",
24
+
"name": "stdout",
25
+
"text": [
26
+
" place area price\n",
27
+
"0 Lonavala 2600 550000\n",
28
+
"1 Lonavala 3000 565000\n",
29
+
"2 Lonavala 3200 610000\n",
30
+
"3 Karjat 2600 450000\n",
31
+
"4 Karjat 2800 500000\n",
32
+
"5 Khandala 2600 600000\n",
33
+
"6 Khandala 2900 650000\n",
34
+
"7 Khandala 3200 680000\n"
35
+
]
36
+
}
37
+
],
38
+
"metadata": {}
39
+
},
40
+
{
41
+
"cell_type": "code",
42
+
"execution_count": 4,
43
+
"source": [
44
+
"dummies = pd.get_dummies(data.place) # dummies is a function in pandas to convert categorical values into numerical ones\r\n",
45
+
"print(dummies)\r\n",
46
+
"\r\n",
47
+
"# the data is converted to numerical values but here we can see that if one col is deleted,\r\n",
48
+
"# we can predict the value for other cols\r\n",
49
+
"# eg1: if we drop the first row, and if it has to be 1 then the other 2 cols would have values 0\r\n",
50
+
"# eg2: if lonavala has 1, then we can say that khandala and karjat would be 0 and hence it is ok to,\r\n",
51
+
"# drop the first col which would become easy for the model to compute. This is shown in the eg below"
0 commit comments