diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/data.csv b/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/data.csv
new file mode 100644
index 00000000..8f00ee5e
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/data.csv	
@@ -0,0 +1,8 @@
+Name,Age,Position,Experience,Salary
+Nitin,24,ML Engg,4,22000
+Harshita,23,ML Engg,3,18000
+Arya,,Data Analyst,,
+Gagan,,Data Analyst,2,10000
+Randhir,,,4,16000
+Saurav,22,admin,3,8000
+Manoj,31,Manager,11,25000
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/place_area_price.csv b/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/place_area_price.csv
new file mode 100644
index 00000000..ea161858
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/place_area_price.csv	
@@ -0,0 +1,9 @@
+place,area,price
+Lonavala,2600,550000
+Lonavala,3000,565000
+Lonavala,3200,610000
+Karjat,2600,450000
+Karjat,2800,500000
+Khandala,2600,600000
+Khandala,2900,650000
+Khandala,3200,680000
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Handling_categorical_data.ipynb b/Machine Learning/Libraries/Data Preprocessing in ML/Handling_categorical_data.ipynb
new file mode 100644
index 00000000..798997e4
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/Handling_categorical_data.ipynb	
@@ -0,0 +1,123 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "source": [
+    "# handling categorical data using dummies from pandas\r\n",
+    "\r\n",
+    "import pandas as pd"
+   ],
+   "outputs": [],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "source": [
+    "data = pd.read_csv(\"E:\\github\\DS-ScriptsNook\\Machine Learning\\Libraries\\Data Preprocessing in ML\\Dataset\\place_area_price.csv\")  #load the data\r\n",
+    "print(data)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "      place  area   price\n",
+      "0  Lonavala  2600  550000\n",
+      "1  Lonavala  3000  565000\n",
+      "2  Lonavala  3200  610000\n",
+      "3    Karjat  2600  450000\n",
+      "4    Karjat  2800  500000\n",
+      "5  Khandala  2600  600000\n",
+      "6  Khandala  2900  650000\n",
+      "7  Khandala  3200  680000\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "source": [
+    "dummies = pd.get_dummies(data.place)       # dummies is a function in pandas to convert categorical values into numerical ones\r\n",
+    "print(dummies)\r\n",
+    "\r\n",
+    "# the data is converted to numerical values but here we can see that if one col is deleted,\r\n",
+    "# we can predict the value for other cols\r\n",
+    "# eg1: if we drop the first row, and if it has to be 1 then the other 2 cols would have values 0\r\n",
+    "# eg2: if lonavala has 1, then we can say that khandala and karjat would be 0 and hence it is ok to,\r\n",
+    "# drop the first col which would become easy for the model to compute. This is shown in the eg below"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "   Karjat  Khandala  Lonavala\n",
+      "0       0         0         1\n",
+      "1       0         0         1\n",
+      "2       0         0         1\n",
+      "3       1         0         0\n",
+      "4       1         0         0\n",
+      "5       0         1         0\n",
+      "6       0         1         0\n",
+      "7       0         1         0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "source": [
+    "new_dummies = pd.get_dummies(data.place, drop_first=True)\r\n",
+    "print(new_dummies)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "   Khandala  Lonavala\n",
+      "0         0         1\n",
+      "1         0         1\n",
+      "2         0         1\n",
+      "3         0         0\n",
+      "4         0         0\n",
+      "5         1         0\n",
+      "6         1         0\n",
+      "7         1         0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ],
+ "metadata": {
+  "orig_nbformat": 4,
+  "language_info": {
+   "name": "python",
+   "version": "3.6.4",
+   "mimetype": "text/x-python",
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "pygments_lexer": "ipython3",
+   "nbconvert_exporter": "python",
+   "file_extension": ".py"
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.6.4 64-bit"
+  },
+  "interpreter": {
+   "hash": "f2db1a205d05422567bfea71378eb1163d4d5d7418f0062693a7c6bfcbaf4348"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Handling_numerical_data1.ipynb b/Machine Learning/Libraries/Data Preprocessing in ML/Handling_numerical_data1.ipynb
new file mode 100644
index 00000000..39006155
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/Handling_numerical_data1.ipynb	
@@ -0,0 +1,269 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "source": [
+    "# check for null values\r\n",
+    "\r\n",
+    "import pandas as pd\r\n",
+    "data = pd.read_csv(\"E:\\github\\DS-ScriptsNook\\Machine Learning\\Libraries\\Data Preprocessing in ML\\Dataset\\data.csv\")\r\n",
+    "print(data)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "       Name   Age      Position  Experience   Salary\n",
+      "0     Nitin  24.0       ML Engg         4.0  22000.0\n",
+      "1  Harshita  23.0       ML Engg         3.0  18000.0\n",
+      "2      Arya   NaN  Data Analyst         NaN      NaN\n",
+      "3     Gagan   NaN  Data Analyst         2.0  10000.0\n",
+      "4   Randhir   NaN           NaN         4.0  16000.0\n",
+      "5    Saurav  22.0         admin         3.0   8000.0\n",
+      "6     Manoj  31.0       Manager        11.0  25000.0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "source": [
+    "# check for null data\r\n",
+    "res = data.isnull().sum()\r\n",
+    "print(res)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "Name          0\n",
+      "Age           3\n",
+      "Position      1\n",
+      "Experience    1\n",
+      "Salary        1\n",
+      "dtype: int64\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "source": [
+    "# find out all whose age is null\r\n",
+    "d1 = data[data.Age.isnull()]\r\n",
+    "print(d1)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "      Name  Age      Position  Experience   Salary\n",
+      "2     Arya  NaN  Data Analyst         NaN      NaN\n",
+      "3    Gagan  NaN  Data Analyst         2.0  10000.0\n",
+      "4  Randhir  NaN           NaN         4.0  16000.0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "source": [
+    "# if any value is null\r\n",
+    "d3 = data[data.isnull().any(axis=1)]\r\n",
+    "print(d3)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "      Name  Age      Position  Experience   Salary\n",
+      "2     Arya  NaN  Data Analyst         NaN      NaN\n",
+      "3    Gagan  NaN  Data Analyst         2.0  10000.0\n",
+      "4  Randhir  NaN           NaN         4.0  16000.0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### How to drop ?"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "source": [
+    "# drops if any null values are present\r\n",
+    "d1 = data.dropna(how=\"any\", axis=0)\r\n",
+    "print(d1)\r\n",
+    "\r\n",
+    "# arya, gagan, randhir got dropped"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "       Name   Age Position  Experience   Salary\n",
+      "0     Nitin  24.0  ML Engg         4.0  22000.0\n",
+      "1  Harshita  23.0  ML Engg         3.0  18000.0\n",
+      "5    Saurav  22.0    admin         3.0   8000.0\n",
+      "6     Manoj  31.0  Manager        11.0  25000.0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "source": [
+    "# drop whose salary is null\r\n",
+    "d2  = data.dropna(subset=[\"Salary\"])\r\n",
+    "print(d2)\r\n",
+    "\r\n",
+    "# arya got dropped"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "       Name   Age      Position  Experience   Salary\n",
+      "0     Nitin  24.0       ML Engg         4.0  22000.0\n",
+      "1  Harshita  23.0       ML Engg         3.0  18000.0\n",
+      "3     Gagan   NaN  Data Analyst         2.0  10000.0\n",
+      "4   Randhir   NaN           NaN         4.0  16000.0\n",
+      "5    Saurav  22.0         admin         3.0   8000.0\n",
+      "6     Manoj  31.0       Manager        11.0  25000.0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "### How to fill missing values ?"
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "source": [
+    "d1 = data.fillna({\"Age\": data[\"Age\"].mean()})   # fillna fills a col with the mean/median values\r\n",
+    "print(d1)"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "       Name   Age      Position  Experience   Salary\n",
+      "0     Nitin  24.0       ML Engg         4.0  22000.0\n",
+      "1  Harshita  23.0       ML Engg         3.0  18000.0\n",
+      "2      Arya  25.0  Data Analyst         NaN      NaN\n",
+      "3     Gagan  25.0  Data Analyst         2.0  10000.0\n",
+      "4   Randhir  25.0           NaN         4.0  16000.0\n",
+      "5    Saurav  22.0         admin         3.0   8000.0\n",
+      "6     Manoj  31.0       Manager        11.0  25000.0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "source": [
+    "# fill missing values in Experience\r\n",
+    "d2 = data.fillna({\"Experience\": data[\"Experience\"].mean()})\r\n",
+    "print(d2)\r\n",
+    "print(\" \")"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "       Name   Age      Position  Experience   Salary\n",
+      "0     Nitin  24.0       ML Engg         4.0  22000.0\n",
+      "1  Harshita  23.0       ML Engg         3.0  18000.0\n",
+      "2      Arya   NaN  Data Analyst         4.5      NaN\n",
+      "3     Gagan   NaN  Data Analyst         2.0  10000.0\n",
+      "4   Randhir   NaN           NaN         4.0  16000.0\n",
+      "5    Saurav  22.0         admin         3.0   8000.0\n",
+      "6     Manoj  31.0       Manager        11.0  25000.0\n",
+      " \n"
+     ]
+    }
+   ],
+   "metadata": {}
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "source": [
+    "# filling null values in Position by a default one\r\n",
+    "d3 = data.fillna({\"Position\": \"Unallocated\"})\r\n",
+    "print(d3)\r\n"
+   ],
+   "outputs": [
+    {
+     "output_type": "stream",
+     "name": "stdout",
+     "text": [
+      "       Name   Age      Position  Experience   Salary\n",
+      "0     Nitin  24.0       ML Engg         4.0  22000.0\n",
+      "1  Harshita  23.0       ML Engg         3.0  18000.0\n",
+      "2      Arya   NaN  Data Analyst         NaN      NaN\n",
+      "3     Gagan   NaN  Data Analyst         2.0  10000.0\n",
+      "4   Randhir   NaN   Unallocated         4.0  16000.0\n",
+      "5    Saurav  22.0         admin         3.0   8000.0\n",
+      "6     Manoj  31.0       Manager        11.0  25000.0\n"
+     ]
+    }
+   ],
+   "metadata": {}
+  }
+ ],
+ "metadata": {
+  "orig_nbformat": 4,
+  "language_info": {
+   "name": "python",
+   "version": "3.6.4",
+   "mimetype": "text/x-python",
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "pygments_lexer": "ipython3",
+   "nbconvert_exporter": "python",
+   "file_extension": ".py"
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3.6.4 64-bit"
+  },
+  "interpreter": {
+   "hash": "f2db1a205d05422567bfea71378eb1163d4d5d7418f0062693a7c6bfcbaf4348"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img1.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img1.PNG
new file mode 100644
index 00000000..e64c95d2
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img1.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img2.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img2.PNG
new file mode 100644
index 00000000..ef50c48a
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img2.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img3.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img3.PNG
new file mode 100644
index 00000000..9bf71552
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img3.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img4.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img4.PNG
new file mode 100644
index 00000000..37b60af2
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img4.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img5.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img5.PNG
new file mode 100644
index 00000000..9e2c0c0b
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img5.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/README.md b/Machine Learning/Libraries/Data Preprocessing in ML/README.md
new file mode 100644
index 00000000..1cce6136
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/README.md	
@@ -0,0 +1,105 @@
+### Data Pre-processing in ML
+
+### GOAL
+To get a basic understanding of how to pre-process the data before giving the data into the model.
+
+
+### PURPOSE
+Our machine learning models just cannot take in the real data as it is , since it may contain inconsistent values, missing values or wrong values. Therefore it is highly important to learn this section before moving further.
+
+
+### DESCRIPTION
+- Data pre-processing means to understand the data.
+- Data preprocessing is a process of preparing the raw data and making it suitable for a machine learning model.
+
+It is of 2 types:
+- one handle numerical values
+- other to handle categorical values
+
+
+### WHAT I HAD DONE
+1. Check for null values
+- In order to proceed, we need to check if our data has any null values or not,
+this can be checked with:
+
+**Syntax:** 
+data.isnull().sum()
+This will give no of null values in each column
+
+2. Handling missing values
+- If you have less missing values and are not that important, then you can directly drop them ,
+
+**Syntax:** 
+d1 = data.dropna(how="any", axis=0), it drops rows which as any null values
+d2  = data.dropna(subset=[column_name]), drops rows from the column name specified
+
+If you want to fill missing values, then you can do them as following:
+**Syntax:** 
+d1 = data.fillna({"Age": data["Age"].mean()}), for numerical data
+d2 = data.fillna({"Position": "Unallocated"}), for categorical data
+
+3. Handling categorical values
+- As we know that our model cannot work with categorical values, we have to convert them into numerical data and this can be done using pandas library
+
+**Syntax:** 
+features = pd.get_dummies(data.place) , eg: place is a column name in the data
+It will convert the categorical data in 0,1,2.. format
+
+new_features = pd.get_dummies(data.place, drop_first=True)
+About **drop_first=True**, i've explained in detailed in Handling_categorical_data.ipynb
+
+
+### WORKFLOW OF YOUR PROJECT FILES
+1. Load the data using pandas
+2. Understand the data, if any null values are present or not.
+3. If there are some null values in numerical column, then they can be dropped of filled with mean/median
+4. If there are some null values in categorical column, we can allocate a default value in them.
+5. Lastly, if the data contains some categorical column, then they should be converted into numerical using the **get _dummies** function in pandas
+
+
+### STATE YOUR PROCEDURE AND UNDERSTANDING FROM YOUR WORK
+- This is a simple approach towards data pre-processing and very easy for the beginners to learn and therefore i chose this methodology.
+- With this approach as well the cleaning of the data can be done.
+- With this repo, I got to revise the ML Skills again
+
+### USAGE
+
+Data pre-processing should be first step for any ml enthisiast since your model cannot take in noisy data and hence it is highly important to learn this.
+
+## USE CASES
+
+This step should be done compulsorily in all cases before developing an ml model.
+
+### LIBRARIES USED
+
+- pandas (pip install pandas)
+
+**ADVANTAGES**
+- Data pre-processing helps us in giving the data into model without any problems
+
+**DISADVANTAGES**
+
+None
+
+**APPLICATIONS**
+- Data preprocessing is used database-driven applications such as customer relationship management and rule-based applications 
+
+**SCREENSHOTS**
+
+<img src="E:\github\DS-ScriptsNook\Machine Learning\Libraries\Data Preprocessing in ML\Images\img1.PNG" alt="None">
+<img src="E:\github\DS-ScriptsNook\Machine Learning\Libraries\Data Preprocessing in ML\Images\img3.PNG" alt="None">
+<img src="E:\github\DS-ScriptsNook\Machine Learning\Libraries\Data Preprocessing in ML\Images\img5.PNG" alt="None">
+
+### CONCLUSION
+
+Therefore, before building any ml model, one must take take about the data preprocessing section because the real-world data is often incomplete, inconsistent, and/or lacking in certain behaviors or trends, and is likely to contain many errors.
+
+### REFERENCES
+https://scikit-learn.org/stable/modules/preprocessing.html
+https://towardsdatascience.com/preprocessing-with-sklearn-a-complete-and-comprehensive-guide-670cb98fcfb9
+https://www.javatpoint.com/data-preprocessing-machine-learning
+
+### YOUR NAME
+Karakattil Dilrose Reji
+
+