prathimacode-hub
diff --git a/Diff for: ‎Machine Learning/Libraries/Data Preprocessing in ML/Dataset/data.csv
+8 b/Diff for: ‎Machine Learning/Libraries/Data Preprocessing in ML/Dataset/data.csv
+8
diff --git a/Diff for: ‎Machine Learning/Libraries/Data Preprocessing in ML/Dataset/place_area_price.csv
+9 b/Diff for: ‎Machine Learning/Libraries/Data Preprocessing in ML/Dataset/place_area_price.csv
+9
diff --git a/Diff for: ‎Machine Learning/Libraries/Data Preprocessing in ML/Handling_categorical_data.ipynb
+123 b/Diff for: ‎Machine Learning/Libraries/Data Preprocessing in ML/Handling_categorical_data.ipynb
+123
@@ -0,0 +1,8 @@
+Name,Age,Position,Experience,Salary
+Nitin,24,ML Engg,4,22000
+Harshita,23,ML Engg,3,18000
+Arya,,Data Analyst,,
+Gagan,,Data Analyst,2,10000
+Randhir,,,4,16000
+Saurav,22,admin,3,8000
+Manoj,31,Manager,11,25000
@@ -0,0 +1,9 @@
+place,area,price
+Lonavala,2600,550000
+Lonavala,3000,565000
+Lonavala,3200,610000
+Karjat,2600,450000
+Karjat,2800,500000
+Khandala,2600,600000
+Khandala,2900,650000
+Khandala,3200,680000
@@ -0,0 +1,123 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "source": [
+    "# handling categorical data using dummies from pandas\r\n",
+    "\r\n",
+    "import pandas as pd"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "source": [
+    "data = pd.read_csv(\"E:\\github\\DS-ScriptsNook\\Machine Learning\\Libraries\\Data Preprocessing in ML\\Dataset\\place_area_price.csv\")  #load the data\r\n",
+    "print(data)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "      place  area   price\n",
+      "0  Lonavala  2600  550000\n",
+      "1  Lonavala  3000  565000\n",
+      "2  Lonavala  3200  610000\n",
+      "3    Karjat  2600  450000\n",
+      "4    Karjat  2800  500000\n",
+      "5  Khandala  2600  600000\n",
+      "6  Khandala  2900  650000\n",
+      "7  Khandala  3200  680000\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "source": [
+    "dummies = pd.get_dummies(data.place)       # dummies is a function in pandas to convert categorical values into numerical ones\r\n",
+    "print(dummies)\r\n",
+    "\r\n",
+    "# the data is converted to numerical values but here we can see that if one col is deleted,\r\n",
+    "# we can predict the value for other cols\r\n",
+    "# eg1: if we drop the first row, and if it has to be 1 then the other 2 cols would have values 0\r\n",
+    "# eg2: if lonavala has 1, then we can say that khandala and karjat would be 0 and hence it is ok to,\r\n",
+    "# drop the first col which would become easy for the model to compute. This is shown in the eg below"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "   Karjat  Khandala  Lonavala\n",
+      "0       0         0         1\n",
+      "1       0         0         1\n",
+      "2       0         0         1\n",
+      "3       1         0         0\n",
+      "4       1         0         0\n",
+      "5       0         1         0\n",
+      "6       0         1         0\n",
+      "7       0         1         0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "source": [
+    "new_dummies = pd.get_dummies(data.place, drop_first=True)\r\n",
+    "print(new_dummies)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "   Khandala  Lonavala\n",
+      "0         0         1\n",
+      "1         0         1\n",
+      "2         0         1\n",
+      "3         0         0\n",
+      "4         0         0\n",
+      "5         1         0\n",
+      "6         1         0\n",
+      "7         1         0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ],
+ "metadata": {
+  "orig_nbformat": 4,
+  "language_info": {
+   "name": "python",
+   "version": "3.6.4",
+   "mimetype": "text/x-python",
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "pygments_lexer": "ipython3",
+   "nbconvert_exporter": "python",
+   "file_extension": ".py"
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.6.4 64-bit"
+  },
+  "interpreter": {
+   "hash": "f2db1a205d05422567bfea71378eb1163d4d5d7418f0062693a7c6bfcbaf4348"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}