diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/data.csv b/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/data.csv
new file mode 100644
index 00000000..8f00ee5e
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/data.csv
@@ -0,0 +1,8 @@
+Name,Age,Position,Experience,Salary
+Nitin,24,ML Engg,4,22000
+Harshita,23,ML Engg,3,18000
+Arya,,Data Analyst,,
+Gagan,,Data Analyst,2,10000
+Randhir,,,4,16000
+Saurav,22,admin,3,8000
+Manoj,31,Manager,11,25000
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/place_area_price.csv b/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/place_area_price.csv
new file mode 100644
index 00000000..ea161858
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/Dataset/place_area_price.csv
@@ -0,0 +1,9 @@
+place,area,price
+Lonavala,2600,550000
+Lonavala,3000,565000
+Lonavala,3200,610000
+Karjat,2600,450000
+Karjat,2800,500000
+Khandala,2600,600000
+Khandala,2900,650000
+Khandala,3200,680000
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Handling_categorical_data.ipynb b/Machine Learning/Libraries/Data Preprocessing in ML/Handling_categorical_data.ipynb
new file mode 100644
index 00000000..798997e4
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/Handling_categorical_data.ipynb
@@ -0,0 +1,123 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "source": [
+ "# handling categorical data using dummies from pandas\r\n",
+ "\r\n",
+ "import pandas as pd"
+ ],
+ "outputs": [],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "source": [
+ "data = pd.read_csv(\"E:\\github\\DS-ScriptsNook\\Machine Learning\\Libraries\\Data Preprocessing in ML\\Dataset\\place_area_price.csv\") #load the data\r\n",
+ "print(data)"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " place area price\n",
+ "0 Lonavala 2600 550000\n",
+ "1 Lonavala 3000 565000\n",
+ "2 Lonavala 3200 610000\n",
+ "3 Karjat 2600 450000\n",
+ "4 Karjat 2800 500000\n",
+ "5 Khandala 2600 600000\n",
+ "6 Khandala 2900 650000\n",
+ "7 Khandala 3200 680000\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "source": [
+ "dummies = pd.get_dummies(data.place) # dummies is a function in pandas to convert categorical values into numerical ones\r\n",
+ "print(dummies)\r\n",
+ "\r\n",
+ "# the data is converted to numerical values but here we can see that if one col is deleted,\r\n",
+ "# we can predict the value for other cols\r\n",
+ "# eg1: if we drop the first row, and if it has to be 1 then the other 2 cols would have values 0\r\n",
+ "# eg2: if lonavala has 1, then we can say that khandala and karjat would be 0 and hence it is ok to,\r\n",
+ "# drop the first col which would become easy for the model to compute. This is shown in the eg below"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Karjat Khandala Lonavala\n",
+ "0 0 0 1\n",
+ "1 0 0 1\n",
+ "2 0 0 1\n",
+ "3 1 0 0\n",
+ "4 1 0 0\n",
+ "5 0 1 0\n",
+ "6 0 1 0\n",
+ "7 0 1 0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "source": [
+ "new_dummies = pd.get_dummies(data.place, drop_first=True)\r\n",
+ "print(new_dummies)"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Khandala Lonavala\n",
+ "0 0 1\n",
+ "1 0 1\n",
+ "2 0 1\n",
+ "3 0 0\n",
+ "4 0 0\n",
+ "5 1 0\n",
+ "6 1 0\n",
+ "7 1 0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ }
+ ],
+ "metadata": {
+ "orig_nbformat": 4,
+ "language_info": {
+ "name": "python",
+ "version": "3.6.4",
+ "mimetype": "text/x-python",
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "pygments_lexer": "ipython3",
+ "nbconvert_exporter": "python",
+ "file_extension": ".py"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3.6.4 64-bit"
+ },
+ "interpreter": {
+ "hash": "f2db1a205d05422567bfea71378eb1163d4d5d7418f0062693a7c6bfcbaf4348"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Handling_numerical_data1.ipynb b/Machine Learning/Libraries/Data Preprocessing in ML/Handling_numerical_data1.ipynb
new file mode 100644
index 00000000..39006155
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/Handling_numerical_data1.ipynb
@@ -0,0 +1,269 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "source": [
+ "# check for null values\r\n",
+ "\r\n",
+ "import pandas as pd\r\n",
+ "data = pd.read_csv(\"E:\\github\\DS-ScriptsNook\\Machine Learning\\Libraries\\Data Preprocessing in ML\\Dataset\\data.csv\")\r\n",
+ "print(data)"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Name Age Position Experience Salary\n",
+ "0 Nitin 24.0 ML Engg 4.0 22000.0\n",
+ "1 Harshita 23.0 ML Engg 3.0 18000.0\n",
+ "2 Arya NaN Data Analyst NaN NaN\n",
+ "3 Gagan NaN Data Analyst 2.0 10000.0\n",
+ "4 Randhir NaN NaN 4.0 16000.0\n",
+ "5 Saurav 22.0 admin 3.0 8000.0\n",
+ "6 Manoj 31.0 Manager 11.0 25000.0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "source": [
+ "# check for null data\r\n",
+ "res = data.isnull().sum()\r\n",
+ "print(res)"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Name 0\n",
+ "Age 3\n",
+ "Position 1\n",
+ "Experience 1\n",
+ "Salary 1\n",
+ "dtype: int64\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "source": [
+ "# find out all whose age is null\r\n",
+ "d1 = data[data.Age.isnull()]\r\n",
+ "print(d1)"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Name Age Position Experience Salary\n",
+ "2 Arya NaN Data Analyst NaN NaN\n",
+ "3 Gagan NaN Data Analyst 2.0 10000.0\n",
+ "4 Randhir NaN NaN 4.0 16000.0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "source": [
+ "# if any value is null\r\n",
+ "d3 = data[data.isnull().any(axis=1)]\r\n",
+ "print(d3)"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Name Age Position Experience Salary\n",
+ "2 Arya NaN Data Analyst NaN NaN\n",
+ "3 Gagan NaN Data Analyst 2.0 10000.0\n",
+ "4 Randhir NaN NaN 4.0 16000.0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### How to drop ?"
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "source": [
+ "# drops if any null values are present\r\n",
+ "d1 = data.dropna(how=\"any\", axis=0)\r\n",
+ "print(d1)\r\n",
+ "\r\n",
+ "# arya, gagan, randhir got dropped"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Name Age Position Experience Salary\n",
+ "0 Nitin 24.0 ML Engg 4.0 22000.0\n",
+ "1 Harshita 23.0 ML Engg 3.0 18000.0\n",
+ "5 Saurav 22.0 admin 3.0 8000.0\n",
+ "6 Manoj 31.0 Manager 11.0 25000.0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "source": [
+ "# drop whose salary is null\r\n",
+ "d2 = data.dropna(subset=[\"Salary\"])\r\n",
+ "print(d2)\r\n",
+ "\r\n",
+ "# arya got dropped"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Name Age Position Experience Salary\n",
+ "0 Nitin 24.0 ML Engg 4.0 22000.0\n",
+ "1 Harshita 23.0 ML Engg 3.0 18000.0\n",
+ "3 Gagan NaN Data Analyst 2.0 10000.0\n",
+ "4 Randhir NaN NaN 4.0 16000.0\n",
+ "5 Saurav 22.0 admin 3.0 8000.0\n",
+ "6 Manoj 31.0 Manager 11.0 25000.0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "markdown",
+ "source": [
+ "### How to fill missing values ?"
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "source": [
+ "d1 = data.fillna({\"Age\": data[\"Age\"].mean()}) # fillna fills a col with the mean/median values\r\n",
+ "print(d1)"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Name Age Position Experience Salary\n",
+ "0 Nitin 24.0 ML Engg 4.0 22000.0\n",
+ "1 Harshita 23.0 ML Engg 3.0 18000.0\n",
+ "2 Arya 25.0 Data Analyst NaN NaN\n",
+ "3 Gagan 25.0 Data Analyst 2.0 10000.0\n",
+ "4 Randhir 25.0 NaN 4.0 16000.0\n",
+ "5 Saurav 22.0 admin 3.0 8000.0\n",
+ "6 Manoj 31.0 Manager 11.0 25000.0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "source": [
+ "# fill missing values in Experience\r\n",
+ "d2 = data.fillna({\"Experience\": data[\"Experience\"].mean()})\r\n",
+ "print(d2)\r\n",
+ "print(\" \")"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Name Age Position Experience Salary\n",
+ "0 Nitin 24.0 ML Engg 4.0 22000.0\n",
+ "1 Harshita 23.0 ML Engg 3.0 18000.0\n",
+ "2 Arya NaN Data Analyst 4.5 NaN\n",
+ "3 Gagan NaN Data Analyst 2.0 10000.0\n",
+ "4 Randhir NaN NaN 4.0 16000.0\n",
+ "5 Saurav 22.0 admin 3.0 8000.0\n",
+ "6 Manoj 31.0 Manager 11.0 25000.0\n",
+ " \n"
+ ]
+ }
+ ],
+ "metadata": {}
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "source": [
+ "# filling null values in Position by a default one\r\n",
+ "d3 = data.fillna({\"Position\": \"Unallocated\"})\r\n",
+ "print(d3)\r\n"
+ ],
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " Name Age Position Experience Salary\n",
+ "0 Nitin 24.0 ML Engg 4.0 22000.0\n",
+ "1 Harshita 23.0 ML Engg 3.0 18000.0\n",
+ "2 Arya NaN Data Analyst NaN NaN\n",
+ "3 Gagan NaN Data Analyst 2.0 10000.0\n",
+ "4 Randhir NaN Unallocated 4.0 16000.0\n",
+ "5 Saurav 22.0 admin 3.0 8000.0\n",
+ "6 Manoj 31.0 Manager 11.0 25000.0\n"
+ ]
+ }
+ ],
+ "metadata": {}
+ }
+ ],
+ "metadata": {
+ "orig_nbformat": 4,
+ "language_info": {
+ "name": "python",
+ "version": "3.6.4",
+ "mimetype": "text/x-python",
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "pygments_lexer": "ipython3",
+ "nbconvert_exporter": "python",
+ "file_extension": ".py"
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3.6.4 64-bit"
+ },
+ "interpreter": {
+ "hash": "f2db1a205d05422567bfea71378eb1163d4d5d7418f0062693a7c6bfcbaf4348"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
\ No newline at end of file
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img1.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img1.PNG
new file mode 100644
index 00000000..e64c95d2
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img1.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img2.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img2.PNG
new file mode 100644
index 00000000..ef50c48a
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img2.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img3.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img3.PNG
new file mode 100644
index 00000000..9bf71552
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img3.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img4.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img4.PNG
new file mode 100644
index 00000000..37b60af2
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img4.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/Images/img5.PNG b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img5.PNG
new file mode 100644
index 00000000..9e2c0c0b
Binary files /dev/null and b/Machine Learning/Libraries/Data Preprocessing in ML/Images/img5.PNG differ
diff --git a/Machine Learning/Libraries/Data Preprocessing in ML/README.md b/Machine Learning/Libraries/Data Preprocessing in ML/README.md
new file mode 100644
index 00000000..1cce6136
--- /dev/null
+++ b/Machine Learning/Libraries/Data Preprocessing in ML/README.md
@@ -0,0 +1,105 @@
+### Data Pre-processing in ML
+
+### GOAL
+To get a basic understanding of how to pre-process the data before giving the data into the model.
+
+
+### PURPOSE
+Our machine learning models just cannot take in the real data as it is , since it may contain inconsistent values, missing values or wrong values. Therefore it is highly important to learn this section before moving further.
+
+
+### DESCRIPTION
+- Data pre-processing means to understand the data.
+- Data preprocessing is a process of preparing the raw data and making it suitable for a machine learning model.
+
+It is of 2 types:
+- one handle numerical values
+- other to handle categorical values
+
+
+### WHAT I HAD DONE
+1. Check for null values
+- In order to proceed, we need to check if our data has any null values or not,
+this can be checked with:
+
+**Syntax:**
+data.isnull().sum()
+This will give no of null values in each column
+
+2. Handling missing values
+- If you have less missing values and are not that important, then you can directly drop them ,
+
+**Syntax:**
+d1 = data.dropna(how="any", axis=0), it drops rows which as any null values
+d2 = data.dropna(subset=[column_name]), drops rows from the column name specified
+
+If you want to fill missing values, then you can do them as following:
+**Syntax:**
+d1 = data.fillna({"Age": data["Age"].mean()}), for numerical data
+d2 = data.fillna({"Position": "Unallocated"}), for categorical data
+
+3. Handling categorical values
+- As we know that our model cannot work with categorical values, we have to convert them into numerical data and this can be done using pandas library
+
+**Syntax:**
+features = pd.get_dummies(data.place) , eg: place is a column name in the data
+It will convert the categorical data in 0,1,2.. format
+
+new_features = pd.get_dummies(data.place, drop_first=True)
+About **drop_first=True**, i've explained in detailed in Handling_categorical_data.ipynb
+
+
+### WORKFLOW OF YOUR PROJECT FILES
+1. Load the data using pandas
+2. Understand the data, if any null values are present or not.
+3. If there are some null values in numerical column, then they can be dropped of filled with mean/median
+4. If there are some null values in categorical column, we can allocate a default value in them.
+5. Lastly, if the data contains some categorical column, then they should be converted into numerical using the **get _dummies** function in pandas
+
+
+### STATE YOUR PROCEDURE AND UNDERSTANDING FROM YOUR WORK
+- This is a simple approach towards data pre-processing and very easy for the beginners to learn and therefore i chose this methodology.
+- With this approach as well the cleaning of the data can be done.
+- With this repo, I got to revise the ML Skills again
+
+### USAGE
+
+Data pre-processing should be first step for any ml enthisiast since your model cannot take in noisy data and hence it is highly important to learn this.
+
+## USE CASES
+
+This step should be done compulsorily in all cases before developing an ml model.
+
+### LIBRARIES USED
+
+- pandas (pip install pandas)
+
+**ADVANTAGES**
+- Data pre-processing helps us in giving the data into model without any problems
+
+**DISADVANTAGES**
+
+None
+
+**APPLICATIONS**
+- Data preprocessing is used database-driven applications such as customer relationship management and rule-based applications
+
+**SCREENSHOTS**
+
+
+
+
+
+### CONCLUSION
+
+Therefore, before building any ml model, one must take take about the data preprocessing section because the real-world data is often incomplete, inconsistent, and/or lacking in certain behaviors or trends, and is likely to contain many errors.
+
+### REFERENCES
+https://scikit-learn.org/stable/modules/preprocessing.html
+https://towardsdatascience.com/preprocessing-with-sklearn-a-complete-and-comprehensive-guide-670cb98fcfb9
+https://www.javatpoint.com/data-preprocessing-machine-learning
+
+### YOUR NAME
+Karakattil Dilrose Reji
+
+