From 2478d40711c3c229738be7b9c3487e85ddca5162 Mon Sep 17 00:00:00 2001 From: Ruchika-20 <84443383+Ruchika-20@users.noreply.github.com> Date: Wed, 24 Nov 2021 23:57:18 +0530 Subject: [PATCH 1/6] Create Data Science with Python : RANSAC Algorithm --- .../Tutorials/Data Science with Python : RANSAC Algorithm | 1 + 1 file changed, 1 insertion(+) create mode 100644 Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm diff --git a/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm b/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm @@ -0,0 +1 @@ + From 7b2ee3753caf2265bbdfe60e9d4a267768020827 Mon Sep 17 00:00:00 2001 From: Ruchika-20 <84443383+Ruchika-20@users.noreply.github.com> Date: Wed, 24 Nov 2021 23:57:39 +0530 Subject: [PATCH 2/6] Delete Data Science with Python : RANSAC Algorithm --- .../Tutorials/Data Science with Python : RANSAC Algorithm | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm diff --git a/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm b/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm deleted file mode 100644 index 8b13789179..0000000000 --- a/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm +++ /dev/null @@ -1 +0,0 @@ - From a4ecf3c16adbed023b3cd1a039eb041d19718e33 Mon Sep 17 00:00:00 2001 From: Ruchika-20 <84443383+Ruchika-20@users.noreply.github.com> Date: Wed, 24 Nov 2021 23:59:32 +0530 Subject: [PATCH 3/6] Create Data Science with Python : RANSAC Algorithm --- .../Tutorials/Data Science with Python : RANSAC Algorithm | 1 + 1 file changed, 1 insertion(+) create mode 100644 Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm diff --git a/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm b/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm @@ -0,0 +1 @@ + From 403d07ec60115763cce62c215a2f34505f524252 Mon Sep 17 00:00:00 2001 From: Ruchika-20 <84443383+Ruchika-20@users.noreply.github.com> Date: Thu, 25 Nov 2021 00:00:09 +0530 Subject: [PATCH 4/6] Delete Data Science with Python : RANSAC Algorithm --- .../Tutorials/Data Science with Python : RANSAC Algorithm | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm diff --git a/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm b/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm deleted file mode 100644 index 8b13789179..0000000000 --- a/Datascience_With_Python/Computer Vision/Tutorials/Data Science with Python : RANSAC Algorithm +++ /dev/null @@ -1 +0,0 @@ - From def88acdeac288b48f74d222e9156cb1d1cdca30 Mon Sep 17 00:00:00 2001 From: Ruchika-20 <84443383+Ruchika-20@users.noreply.github.com> Date: Sat, 27 Nov 2021 11:19:14 +0530 Subject: [PATCH 5/6] Create README.md --- .../README.md | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 Datascience_With_Python/Computer Vision/Tutorials/Data science with Python: RANSAC Algorithm/README.md diff --git a/Datascience_With_Python/Computer Vision/Tutorials/Data science with Python: RANSAC Algorithm/README.md b/Datascience_With_Python/Computer Vision/Tutorials/Data science with Python: RANSAC Algorithm/README.md new file mode 100644 index 0000000000..84c0cb5073 --- /dev/null +++ b/Datascience_With_Python/Computer Vision/Tutorials/Data science with Python: RANSAC Algorithm/README.md @@ -0,0 +1,44 @@ +Random sample consensus (RANSAC) +Definition +Hello EveryOne ! In this tutorial , we are going to have a look on RANSAC.So let's get started ! Hope you like it. + +Basic Idea +After reading this tutorial , you will acquire a basic knowledge of RANSAC and its imortance in computer vision. + +Goal +To understand the concept of RANSAC in detail. + +Topics to be Covered in this Tutorial : +WHAT IS RANSAC? + +Random sample consensus (RANSAC) is an iterative method to estimate parameters of a mathematical model from a set of observed data that contains outliers, when outliers are to be accorded no influence on the values of the estimates. + +PURPOSE + +This explains about the main purpose of using RANSAC over other algorithms and what is the basic idea behind using this. + +The RANSAC algorithm is a learning technique to estimate parameters of a model by random sampling of observed data. Given a dataset whose data elements contain both inliers and outliers, RANSAC uses the voting scheme to find the optimal fitting result. + +Major Advantages and Disadvantages + +An advantage of RANSAC is its ability to do robust estimation of the model parameters, i.e., it can estimate the parameters with a high degree of accuracy even when a significant number of outliers are present in the data set. + +A disadvantage of RANSAC is that there is no upper bound on the time it takes to compute these parameters. When the number of iterations computed is limited the solution obtained may not be optimal, and it may not even be one that fits the data in a good way. In this way RANSAC offers a trade-off; by computing a greater number of iterations the probability of a reasonable model being produced is increased. + +Another disadvantage of RANSAC is that it requires the setting of problem-specific thresholds. + +RANSAC can only estimate one model for a particular data set. As for any one-model approach when two (or more) model instances exist, RANSAC may fail to find either one. The Hough transform is an alternative robust estimation technique that may be useful when more than one model instance is present. + +Implementation of algorithm in Python + +This covers a detailed explaination of RANSAC algorithm using Python on a given Image. Feature Extraction and FEature matching are done for a given image. + +Bibliography + +http://www.cse.yorku.ca/~kosta/CompVis_Notes/ransac.pdf + +https://en.wikipedia.org/wiki/Random_sample_consensus#Advantages_and_disadvantages + +https://scikit-learn.org/stable/auto_examples/linear_model/plot_ransac.html + +https://vitalflux.com/ransac-regression-explained-with-python-examples/ From 292153f039f02fe2d8567a7831768b452ffca8c4 Mon Sep 17 00:00:00 2001 From: Ruchika-20 <84443383+Ruchika-20@users.noreply.github.com> Date: Sat, 27 Nov 2021 11:19:56 +0530 Subject: [PATCH 6/6] Add files via upload --- ... with Python RANSAC Algorithm #7355.ipynb | 346 ++++++++++++++++++ 1 file changed, 346 insertions(+) create mode 100644 Datascience_With_Python/Computer Vision/Tutorials/Data science with Python: RANSAC Algorithm/Data Science with Python RANSAC Algorithm #7355.ipynb diff --git a/Datascience_With_Python/Computer Vision/Tutorials/Data science with Python: RANSAC Algorithm/Data Science with Python RANSAC Algorithm #7355.ipynb b/Datascience_With_Python/Computer Vision/Tutorials/Data science with Python: RANSAC Algorithm/Data Science with Python RANSAC Algorithm #7355.ipynb new file mode 100644 index 0000000000..7f35619343 --- /dev/null +++ b/Datascience_With_Python/Computer Vision/Tutorials/Data science with Python: RANSAC Algorithm/Data Science with Python RANSAC Algorithm #7355.ipynb @@ -0,0 +1,346 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Untitled9.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "Hus89IFgfXW1" + }, + "source": [ + "# **RANSAC ALGORITHM**\n", + "\n", + "---\n", + "\n", + "\n", + "\n", + "---\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1lMSM87Ak_qo" + }, + "source": [ + "**OVERVIEW :**\n", + "\n", + "---\n", + "\n", + "\n", + "\n", + "\n", + "**Random sample consensus, or** **RANSAC**, is an iterative method for estimating a mathematical model from a data set that contains outliers. The RANSAC algorithm works by identifying the outliers in a data set and estimating the desired model using data that does not contain outliers.\n", + "\n", + "\n", + "The RANdom SAmple Consensus **(RANSAC)** algorithm proposed by **Fischler and**\n", + "**Bolles** is a general parameter estimation approach designed to cope with a large\n", + "proportion of outliers in the input data. \n", + "\n", + "Unlike many of the common robust estimation techniques such as M-estimators and least-median squares that have beenadopted by the computer vision community from the statistics literature, RANSACwas developed from within the computer vision community.\n", + "\n", + "\n", + "RANSAC is a **resampling technique** that generates candidate solutions by using\n", + "the minimum number observations (data points) required to estimate the underlying model parameters. \n", + "\n", + "As pointed out by Fischler and Bolles, unlike conventional sampling techniques that use as much of the data as possible to obtain an initial solution and then proceed to prune outliers, RANSAC uses the smallest set possible and proceeds to enlarge this set with consistent data points .\n", + "\n", + "\n", + "\n", + "\n", + "For example, the equation of a line that best fits a set of points can be estimated using RANSAC." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z-yXws6d8Nb9" + }, + "source": [ + "**TERMINOLOGIES**:\n", + "\n", + "---\n", + "\n", + "\n", + "\n", + "**Outliers:** \n", + "\n", + "Outliers are data which are far from other data points. These data points can be found at both the ends. Meaning, outliers can be way below a certain point which can be termed as minimum or way more than other point which can be termed as maximum. One of the easy way to find outlier is to use boxplot. Here is the box plot created to find out outliers in the housing prices. In the boxplot given below, one can find outliers at both the ends such as minimum and maximum. These are the points which can have adverse impact on coefficients of regression model if not taken care of. In this post, you will learn about how using RANSAC algorithm will exclude outliers from training data used for training the model.\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 415 + }, + "id": "jDuGxVaW9ooY", + "outputId": "f0a75ed9-b7ea-4af3-f992-aa8f685d3b70" + }, + "source": [ + "from sklearn import datasets\n", + "import matplotlib.pyplot as plt\n", + "#\n", + "# Load the boston housing dataset\n", + "#\n", + "bhd = datasets.load_boston()\n", + "X = bhd.data\n", + "y = bhd.target\n", + "#\n", + "# Create the box plot\n", + "#\n", + "fig1, ax1 = plt.subplots()\n", + "ax1.set_title('Box plot for Housing Prices')\n", + "ax1.boxplot(y, vert=False)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'boxes': [],\n", + " 'caps': [,\n", + " ],\n", + " 'fliers': [],\n", + " 'means': [],\n", + " 'medians': [],\n", + " 'whiskers': [,\n", + " ]}" + ] + }, + "metadata": {}, + "execution_count": 4 + }, + { + "output_type": "display_data", + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAWoAAAEICAYAAAB25L6yAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAASVklEQVR4nO3de3BcZ33G8eeRZFbgJHYUCexg2RINF0duSBuXy4S2dqA0A6ZJB4bLGMK10LQydFKG4pg2Lm0IaUdAS6fjpiQTXLAh5U467RBQQuN2CrUhQIBSLuOQxCEmJCZElJiEX/84ZzdH0upmS9qfvN/PzBntOe973vc9r/Y8e3R2vXZECACQV0erBwAAmB5BDQDJEdQAkBxBDQDJEdQAkBxBDQDJEdSYM9sDtsN21zy192Tbt9j+ie03zkebrWD7121/q9XjkCTbX7e9qdXjwPwgqJcY2wdt/5/tB2zfZ/tfbPe3elxTsb3T9gdmqPYWSTdGxMkR8bcL1Wf54nLG8bY/lYi4OSKevBBt277J9s/K3/s9tj9me/U0YxmKiJsWYixYfAT10vSCiDhJ0mpJd0t6b4vHc7zWSfr6sew4X1f1S8Rw+Xt/kqSVkt49sUKbzUfbIKiXsIj4maSPSDqzvs32Ctu7bf/Q9m2232a7w3aP7Ttsv6Csd5Lt79i+qFnb5RXcFba/aPt+25+03TNF3dNtf8r2vWWbv1duP1/SpZJeUl4JfqXJvqOSNkv6u7LOk6Y6hrL+q2z/h+132/6RpJ3HMne2a7bfY/tQubzHdq3Sx74J9RtX47afZ/sb5a2aO22/udy+yfYdlX0O2n6z7a/a/rHtD9vurpS/xfZdZf+vm+0Vf0TcK+mjkjZU+vkT21+VNGa7q9z2nLK80/altr9bjvlA/a8w20+xfUP5u/uW7RdXxtf0OLH4COolzPZjJL1E0n9VNr9X0gpJT5D0m5IukvTq8uR+jaR/tP1YFVdjt0TE7mm6uKjcZ7WkhyRNdVviQ5LukHS6pBdJeoft8yLi3yS9Q9KHI+KkiHjqxB0j4jxJN6u8WoyI/53qGCq7PV3S9yQ9TtLl04x/OjskPUPS2ZKeKulpkt42y32vlvSGiDhZRViOTlP3xZLOlzQo6SxJr5IaL2KXSHqOpDMkbZrtwG33SnqhpC9XNr9M0vMlrYyIhybscklZ/jxJp6j4nf7U9nJJN0jaI+mxkl4q6e9t11/453KcWEgRwbKEFkkHJT0g6Yikn0s6JOmXy7JOSUclnVmp/wZJN1XW3yvpa5LulHTaNP3cJOmdlfUzy7Y7JQ1ICkldkvolPSzp5ErdKyRdWz7eKekDMxzTTZJeN5tjUBF035+hvZ1lG0cmLCHpjLLOdyU9r7LPb0s6WOlj34Q2q/t+vxzTKRPqbJJ0x4Tf1csr638laVf5+BpJV1TKzqj2McUc/bQ8jjslfVBSX6Wf1zR5njynfPwtSRc0afMlkm6esO0fJF023XGyLP7CFfXSdGFErJTULWlY0udtr5LUK2mZpNsqdW+T9PjK+lUqro6ujYgfzdDP7RPaWVb2UXW6pHsj4ifT9DkXszmG2zWz6yJiZXWZUH56kz5On+UYX6ji6vQ225+3/cxp6v6g8vinkk6q9F89jtkc0xvLY3l8RGyNiB/Ocv9+FS9ME62T9HTbR+qLpK2SVpXlczlOLCCCegmLiIcj4mMqrmifJekeFVfZ6yrV1qq4ApPtThVBvVvSH8zifmj10yRry7bvmVDnkKQe2yc361PFVeJcTHsMx9hmM4ea9HGofDwm6TH1gvJF8JHOI/47Ii5QcbvgE5KuO4b+75K0prJ+vJ/cmW5Obpf0S1Ns//yEF7STIuJiad6OE/OAoF7CXLhA0qmSvhkRD6s4mS63fbLtdSruT9Y/qnapihP6NZL+WtLuMryn8nLbZ5b3wt8u6SNlHw0Rcbuk/5R0he1u22dJem2lz7slDdTfDJzJLI5hvuyV9DbbfeU93z+r9PEVSUO2zy7f/NtZ38n2o2xvtb0iIn4u6X5JvziG/q+T9Grb68v5/dPjOZgZvE/SX9h+YvmcOcv2aZKul/Qk26+wvaxcfq0c03wdJ+YBQb00fdr2AypOnsslvTIi6h9v26biivB7kvapeKPoGtvnqAi8i8owvFJFaL91mn7+SdK1Kv5875Y01T9GeZmK+9aHJH1cxT3Oz5Zl/1z+/JHtL83y+Joewyz3na2/lLRf0ldV3LP/UrlNUbyh+XZJn5X07XIMVa+QdND2/ZJ+X8XtgjmJiH9V8ebsjZK+o0feEH5wrm3NwrtUvDB8RsVz5mpJjy5vVz1XxZuIh1T8nq+UVCv3O+7jxPxwBP9xACazfZOKNwHf1+qxtAPb6yXdKqkWkz+1gTbHFTXQIrZ/t/w896kqrmQ/TUijGYIaaJ03SDqs4hMZD0u6uLXDQVbc+gCA5LiiBoDkFuQLXHp7e2NgYGAhmgaAE9KBAwfuiYi+ZmULEtQDAwPav3//QjQNACck27dNVcatDwBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqAGgOQIagBIjqA+AfX09Mj2oi3auWJR+unp6Wn11AIt0dXqAWD+3XfffYqIxetw54pF6c/2gvcBZMQVNQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHLpg5qPZAHHh3No6Usf1ADQ7ghqAEiOoAaA5AhqAEhuxqC2fY3tw7ZvXciBbNu2Td3d3bKt7u5ubdu2bSG7A9rK3r17tWHDBtlWR0fHuJ/d3d3q6OjQhg0btHfv3nH1Ozs7x22fqaxZn52dnerv71d/f39jn23btk0q6+joaIxl4rZm45w4jmqbzdab7TPV2Ovj7+/vb3wp2LJlyybNU13TLyubTxEx7SLpNyT9qqRbZ6pbX84555yYi+Hh4ejq6oqRkZEYGxuLkZGR6OrqiuHh4SiGiLlY9Dm77JRF6YbnwrGRFIODg3HhhRdGV1dXPPvZz47Ozs4YGhoKSXHuuefG4OBg7NixIwYHB2N4eDgGBwdjdHQ0jh49GqOjozE4OBh79uyJPXv2TFlWVa23e/fuWL16daxatSp2794dO3bsiK6urtixY0ejbMWKFdHX1xcjIyPR29sbK1eujBUrVkRvb29s3bo1urq6YuvWrTEwMBA7duyIvr6+6Ovra4yj2maz9dHR0Un7TDX2+vj7+vpi1apVsX379li1alX09PREb29vY57q+0lqLNdff/249Tn+nvbHVDk8VUGMD+uBhQzqWq0WIyMj47aNjIxErVbj5DwGBDWqJMXo6GjjPBsaGmqcXxdffHHUarUYHR2NoaGhRr3R0dFxbdTL63WalVVV69UfV9uoj6NeNjAwEAMDAxERjcf1pVq/3ka1fr2Pep1m69V2Zxp7ff+BgYFx81IfZ3Uc9fmd+NxMG9SSXi9pv6T9a9eunesAY2xsbNy2sbGxca9MLHNbFtUiBjXLsS1Hjx4NqTjPOjo6GufXkSNHGuUdHR2NekePHh039/Xyep1mZVXVevXH1Tbq46iXdXR0hO1Gfdthe1L9at1qn9U6zdYjYtI+U429OobqvEycg/p+UnElXVW/sp4LTRPU8/Z91BFxlaSrJGnjxo0xl31rtZp27dqlSy65pLFt165dqtVqevDBBxf3u5VPACfyP3DguTB3trVv377GebZ+/frG+bV9+3bVajXt27dP69evb9Tbt2+fNm/e3GijXl5/PFVZXb2tzZs3Nx7Xt0tqjKO+/9q1axv7Vh9L0vLlyxv1632NjY1N6q/a5sR1SVq3bt2kuWk29vr+Y2Nj4+alPrbly5dP2m/Lli3jnptbtmyZ1OZxmSrBY45X1NWFe9Sttehzxq2P1CTuUXOPeh6COqII6/o96VqtFsPDw41JwNwQ1KiSFHv27GkEs+1xP2u1WtiOoaGhRvjU63d0dIzbPlNZVbXemjVrYs2aNY19hoeHJ5XZboxl4rZm45w4jmqbzdab7TPV2OvjX7NmTSN0u7q6Js1TdY4nLsfwe5oyqB0z/Clpe6+kTZJ6Jd0t6bKIuHq6fTZu3Bj79++fxfX8zGzz5+4cLfqc7Vwh7fzxgnfDc+HYMG9Lg+0DEbGxWdmM96gj4mXzPyQAwGzxLxMBIDmCGgCSI6gBILn0Qc2bIMDx4Rxa+tIHNQC0O4IaAJIjqAEgOYIaAJIjqAEgOYIaAJKbt685RS6L+VWncdkpi9LfqaeeuuB9ABkR1CegVnxuNnYuepdA2+DWBwAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAkR1ADQHIENQAk54iY/0btH0q6bd4bXly9ku5p9SCSYC7GYz7GYz4ecTxzsS4i+poVLEhQnwhs74+Ija0eRwbMxXjMx3jMxyMWai649QEAyRHUAJAcQT21q1o9gESYi/GYj/GYj0csyFxwjxoAkuOKGgCSI6gBILm2D2rb19g+bPvWyrYe2zfY/nb589RWjnEx2e63faPtb9j+uu03ldvbbk5sd9v+ou2vlHPx5+X2QdtfsP0d2x+2/ahWj3Ux2e60/WXb15frbTsftg/a/prtW2zvL7fN+7nS9kEt6VpJ50/Y9lZJn4uIJ0r6XLneLh6S9McRcaakZ0j6Q9tnqj3n5EFJ50XEUyWdLel828+QdKWkd0fEGZLuk/TaFo6xFd4k6ZuV9Xafj80RcXbl89Pzfq60fVBHxL9LunfC5gskvb98/H5JFy7qoFooIu6KiC+Vj3+i4oR8vNpwTqLwQLm6rFxC0nmSPlJub4u5qLO9RtLzJb2vXLfaeD6mMO/nStsH9RQeFxF3lY9/IOlxrRxMq9gekPQrkr6gNp2T8s/8WyQdlnSDpO9KOhIRD5VV7lDxQtYu3iPpLZJ+Ua6fpvaej5D0GdsHbL++3Dbv50rX8TZwoouIsN12n2G0fZKkj0r6o4i4v7hwKrTTnETEw5LOtr1S0sclPaXFQ2oZ21skHY6IA7Y3tXo8STwrIu60/VhJN9j+n2rhfJ0rXFE3d7ft1ZJU/jzc4vEsKtvLVIT0ByPiY+Xmtp6TiDgi6UZJz5S00nb9ImeNpDtbNrDFda6k37F9UNKHVNzy+Bu173woIu4sfx5W8UL+NC3AuUJQN/cpSa8sH79S0idbOJZFVd5zvFrSNyPiXZWitpsT233llbRsP1rSb6m4Z3+jpBeV1dpiLiQpIrZHxJqIGJD0UkmjEbFVbToftpfbPrn+WNJzJd2qBThX2v5fJtreK2mTiq8nvFvSZZI+Iek6SWtVfF3riyNi4huOJyTbz5J0s6Sv6ZH7kJequE/dVnNi+ywVbwZ1qriouS4i3m77CSquKHskfVnSyyPiwdaNdPGVtz7eHBFb2nU+yuP+eLnaJWlPRFxu+zTN87nS9kENANlx6wMAkiOoASA5ghoAkiOoASA5ghoAkiOoASA5ghoAkvt/rbt+5q3o990AAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "C5iMDtI992rn" + }, + "source": [ + "**Inliers**: \n", + "\n", + "The data which are not outliers can be said as the inliers. Inliers represent the dataset which are used for training the model using algorithm specified as base_estimator. Looking the boxplot, one can say that the inliers data belong to dataset lying between minimum and maximum point." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "EKhz33W0-MSv" + }, + "source": [ + "**Mean Absolute Deviation (MAD)**: \n", + "\n", + "Median absolute deviation is measure of variability of observations from the median value of the observation. This will be used to select the inlier threshold to classify whether a data point is inlier or outlier. The choice of right value for MAD is problem specific and the challenge is to select the most appropriate value for MAD to have the model which generalizes well with unseen dataset." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "97MzsQhqlUGS" + }, + "source": [ + "**ALGORITHM** : \n", + "\n", + "---\n", + "\n", + "\n", + "\n", + "\n", + "1: Select randomly the minimum number of points required to determine the model\n", + "parameters.\n", + "\n", + "2: Solve for the parameters of the model.\n", + "\n", + "3: Determine how many points from the set of all points fit with a predefined tolerance.\n", + "\n", + "4: If the fraction of the number of inliers over the total number points in the set exceeds a predefined threshold τ , re-estimate the model parameters using all the identified inliers and terminate.\n", + "\n", + "5: Otherwise, repeat steps 1 through 4 (maximum of N times)." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HcmQWiIOqioX" + }, + "source": [ + "**ADVANTAGES AND DISADVANTAGES :**\n", + "\n", + "---\n", + "\n", + "An advantage of RANSAC is its ability to do robust estimation of the model parameters, i.e., it can estimate the parameters with a high degree of accuracy even when a significant number of outliers are present in the data set.\n", + "\n", + "A disadvantage of RANSAC is that there is no upper bound on the time it takes to compute these parameters (except exhaustion). \n", + " \n", + "When the number of iterations computed is limited the solution obtained may not be optimal, and it may not even be one that fits the data in a good way. In this way RANSAC offers a trade-off; by computing a greater number of iterations the probability of a reasonable model being produced is increased. \n", + " \n", + "Moreover, RANSAC is not always able to find the optimal set even for moderately contaminated sets and it usually performs badly when the number of inliers is less than 50%.\n", + " \n", + "Optimal RANSAC was proposed to handle both these problems and is capable of finding the optimal set for heavily contaminated sets, even for an inlier ratio under 5%. \n", + "\n", + "Another disadvantage of RANSAC is that it requires the setting of problem-specific thresholds.\n", + "\n", + "RANSAC can only estimate one model for a particular data set. As for any one-model approach when two (or more) model instances exist, RANSAC may fail to find either one. \n", + "\n", + "The Hough transform is one alternative robust estimation technique that may be useful when more than one model instance is present. \n", + "\n", + "Another approach for multi model fitting is known as PEARL, which combines model sampling from data points as in RANSAC with iterative re-estimation of inliers and the multi-model fitting being formulated as an optimization problem with a global energy function describing the quality of the overall solution.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "qrYx65JJwxtb" + }, + "source": [ + "**IMPLEMENTING RANSAC ALGORITHM IN PYTHON** :\n", + "\n", + "---\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SbZjhDUewrGE" + }, + "source": [ + "**Robust linear model estimation using RANSAC**\n", + "\n", + "---\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 313 + }, + "id": "ZKQeDZ2fuJLd", + "outputId": "b6a719a0-809e-448f-e2ce-4f028a8e6715" + }, + "source": [ + "\n", + "import numpy as np\n", + "from matplotlib import pyplot as plt\n", + "\n", + "from sklearn import linear_model, datasets\n", + "\n", + "\n", + "n_samples = 1000\n", + "n_outliers = 50\n", + "\n", + "\n", + "X, y, coef = datasets.make_regression(\n", + " n_samples=n_samples,\n", + " n_features=1,\n", + " n_informative=1,\n", + " noise=10,\n", + " coef=True,\n", + " random_state=0,\n", + ")\n", + "\n", + "# Add outlier data\n", + "np.random.seed(0)\n", + "X[:n_outliers] = 3 + 0.5 * np.random.normal(size=(n_outliers, 1))\n", + "y[:n_outliers] = -3 + 10 * np.random.normal(size=n_outliers)\n", + "\n", + "# Fit line using all data\n", + "lr = linear_model.LinearRegression()\n", + "lr.fit(X, y)\n", + "\n", + "# Robustly fit linear model with RANSAC algorithm\n", + "ransac = linear_model.RANSACRegressor()\n", + "ransac.fit(X, y)\n", + "inlier_mask = ransac.inlier_mask_\n", + "outlier_mask = np.logical_not(inlier_mask)\n", + "\n", + "# Predict data of estimated models\n", + "line_X = np.arange(X.min(), X.max())[:, np.newaxis]\n", + "line_y = lr.predict(line_X)\n", + "line_y_ransac = ransac.predict(line_X)\n", + "\n", + "# Compare estimated coefficients\n", + "print(\"Estimated coefficients (true, linear regression, RANSAC):\")\n", + "print(coef, lr.coef_, ransac.estimator_.coef_)\n", + "\n", + "lw = 2\n", + "plt.scatter(\n", + " X[inlier_mask], y[inlier_mask], color=\"yellowgreen\", marker=\".\", label=\"Inliers\"\n", + ")\n", + "plt.scatter(\n", + " X[outlier_mask], y[outlier_mask], color=\"gold\", marker=\".\", label=\"Outliers\"\n", + ")\n", + "plt.plot(line_X, line_y, color=\"navy\", linewidth=lw, label=\"Linear regressor\")\n", + "plt.plot(\n", + " line_X,\n", + " line_y_ransac,\n", + " color=\"cornflowerblue\",\n", + " linewidth=lw,\n", + " label=\"RANSAC regressor\",\n", + ")\n", + "plt.legend(loc=\"lower right\")\n", + "plt.xlabel(\"Input\")\n", + "plt.ylabel(\"Response\")\n", + "plt.show()" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Estimated coefficients (true, linear regression, RANSAC):\n", + "82.1903908407869 [54.17236387] [82.08533159]\n" + ] + }, + { + "output_type": "display_data", + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + } + } + ] + } + ] +} \ No newline at end of file