diff --git a/titanic.ipynb b/titanic.ipynb new file mode 100644 index 0000000..a83b5c5 --- /dev/null +++ b/titanic.ipynb @@ -0,0 +1,600 @@ +{ + "metadata": { + "name": "", + "signature": "sha256:472863789a96bfe7660405b251860e505a7b104b48393582dbef1747620a2a87" + }, + "nbformat": 3, + "nbformat_minor": 0, + "worksheets": [ + { + "cells": [ + { + "cell_type": "heading", + "level": 1, + "metadata": {}, + "source": [ + "Titanic Analysis" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sb" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 661 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "%matplotlib inline" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 662 + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Reading Data" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "train = pd.read_csv(\"./train.csv\")\n", + "train.info()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "Int64Index: 891 entries, 0 to 890\n", + "Data columns (total 12 columns):\n", + "PassengerId 891 non-null int64\n", + "Survived 891 non-null int64\n", + "Pclass 891 non-null int64\n", + "Name 891 non-null object\n", + "Sex 891 non-null object\n", + "Age 714 non-null float64\n", + "SibSp 891 non-null int64\n", + "Parch 891 non-null int64\n", + "Ticket 891 non-null object\n", + "Fare 891 non-null float64\n", + "Cabin 204 non-null object\n", + "Embarked 889 non-null object\n", + "dtypes: float64(2), int64(5), object(5)\n", + "memory usage: 90.5+ KB\n" + ] + } + ], + "prompt_number": 663 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test = pd.read_csv(\"./test.csv\")\n", + "test.info()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "\n", + "Int64Index: 418 entries, 0 to 417\n", + "Data columns (total 11 columns):\n", + "PassengerId 418 non-null int64\n", + "Pclass 418 non-null int64\n", + "Name 418 non-null object\n", + "Sex 418 non-null object\n", + "Age 332 non-null float64\n", + "SibSp 418 non-null int64\n", + "Parch 418 non-null int64\n", + "Ticket 418 non-null object\n", + "Fare 417 non-null float64\n", + "Cabin 91 non-null object\n", + "Embarked 418 non-null object\n", + "dtypes: float64(2), int64(4), object(5)\n", + "memory usage: 39.2+ KB\n" + ] + } + ], + "prompt_number": 664 + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Reducing Data" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "train_data = train[['Survived', \n", + " 'Pclass',\n", + " 'Sex']]\n", + "\n" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 665 + }, + { + "cell_type": "heading", + "level": 2, + "metadata": {}, + "source": [ + "Exploring Variables" + ] + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "train_data.Pclass.unique()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 666, + "text": [ + "array([3, 1, 2])" + ] + } + ], + "prompt_number": 666 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "train_data.Sex.unique()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "pyout", + "prompt_number": 667, + "text": [ + "array(['male', 'female'], dtype=object)" + ] + } + ], + "prompt_number": 667 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "def print_hist(data, key, title, axis_labels, xticks):\n", + " ax = data[key].plot(kind='hist', alpha=0.5, title=title, xticks=xticks)\n", + " if axis_labels:\n", + " ax.set_xticklabels(axis_labels, rotation=45) " + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 668 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print_hist(train_data,'Survived', \"Survived or Not\", [\"No\", \"Yes\"], [0,1])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "display_data", + "png": "iVBORw0KGgoAAAANSUhEUgAAAYwAAAETCAYAAAAlCTHcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFTBJREFUeJzt3XmUnXV9x/H3TJIJJMzEbZCDC2qtX6k1yKIgyiqCeFSs\nVlu1WnIquKSI1rXBalvBaBVsUxUxEXE72Ipa0YiiEVlyRMSVCP0KKtWK1VGyjDOQSTK3fzzP6CVO\nMr/J5Mm9Y96vc+bk3me73/tHns/9Lc/z9LRaLSRJmkpvpwuQJM0OBoYkqYiBIUkqYmBIkooYGJKk\nIgaGJKnI3E4XIE1HRBwFvBW4L9UPnp8Cr8nMm3fT8V8C3Csz374bjnUE8InMfOjMK/vtMceB8zLz\nH9qW/TmwNDNPmGLfFwPzMvPC3VWP9i62MDRrRMR84HPA32XmIZn5aOBjwBUR0bM7PiMzL9odYdGw\nV0XEMbuw3xOBBbu7GO09bGFoNlkALAL6JxZk5sciYiMwNyKeAPx7HSRExPET7yPiH4HHAwcA64Bj\ngD/LzG/W234c+Gq9/r7A5cD5mbm4Xn8v4EfAQ4H9gH8HHgzMAz6emcvr7V4GvBLYCHx/R18kIp4J\nvAmYA2yiCsFvbFfndzPzRZPsfg7w0Yg4JDM3bHfcecAFwInANuDrwKuAJwNPB06KiFFbGdoVtjA0\na2TmeuB1wBci4ocR8eGIWAKsycwtBYd4EHBoZr4AuBg4HSAi7g2cRNVaaQGtzPwSsF9EHF7v+zzg\nc5m5EfgIcHFmHgEcCTw5Ip4TEY8B3gwck5mPA0YmKyIiHglcCDwrMw+hCo7PRMREEE7UOVlYUH/+\ntcD7J1n3RqqwWQwcQvV//B2Z+WmqELzAsNCuMjA0q2Tmu4D9gVcAPwdeD3w7IgYKdr8+M8fr1xcD\nz61/kT8PuDwzh4Ge+g/gA9ShAiwBVkXEQuA44C0R8W3ga8ADqU7OJwJfzMxf1vtctIM6TgS+nJm3\n19/pKuCXwOFUgdVe5468DDg8Iv6m3mfCU4D3Zea2zGxRtYRObVu/W7rutHcyMDRrRMQTIuK1mTmS\nmasz8/XAo4BxqhZCi3ueEPu2O8Rvf/Fn5k+AbwFPowqFlfWq9pPvJVShcgiwKDOvoepCAnh8Zh6a\nmYcCRwPL633b/09t28FXaQ+lCb1U3Vv3qHNH6nB7AfBO4BHbHaf92HPajgv3/H7StBgYmk2GgHMi\n4ti2ZQ8AFgI31esfHBGD9SD4M6c43krgDcC+mfm1etlvT7aZ+TOqMYCL6m3JzE3A9cCrASJiEVX3\n0DOALwEnR8QD6kOcvoPP/Uq93UPrY5xI1Uq5nmm0ADLzeuB8qm6wiSD4IvDSiJgbEb3AUuDKet1W\nfj9EpWIGhmaNzPwBVQi8JSJ+HBHfBz4OnJGZt9ZTay8CbqTqKrqD351IW/z+r+vLgYOoup7YwXYr\ngccAH2pb9nzgqIj4HlWgXJqZl2bmOqoxljUR8Q2qIPu9X/SZeQvwcuBTEXET1TThp9ethsnqbLf9\nuvOogmbCucD/Ad8BbqZqYZxdr7sCeEVEvH4nx5d2qMfbm0uSSjQ6rTYi/p5qKt884N3AWqp+4XGq\nqY1LM7MVEWcAZ1I1mc/NzNVN1iVJmr7GuqTqOfCPz8yjgeOBh1H1ty7LzGOp+mpPi4gDgLOoBg5P\nAZZHhP2sktRlmhzDOBm4KSL+C/gsVX/x4fVME6j6U08CHguszcwt9YDibVRzyCVJXaTJLqlBqguQ\nnkbVuvgs95wBMkx11e4A1VWx2y+XJHWRJgPjV8AtmbkV+EFE3E01BXLCALCB6rYI/W3L+4H1Oztw\nq9Vq9fR4/ZEkTdOMTpxNBsZ1VNP5LoiIA6nuA7QmIo7LzKuprj5dA9wAnFffWG4f4GCqAfEd6unp\nYWhouMHSJekPz+Bg/9Qb7USj02oj4u3ACVRjJX8P3E41r72Pao74GfUsqRdTzZLqpbp186enOHTL\nwJCk6Rkc7J9RC2O2XodhYEjSNM00MLzSW5JUxMCQJBUxMCRJRQwMSVIRA0OSVMTAkCQVMTAkSUUM\nDElSEQNDklTEwJAkFTEwJElFDAxJUhEDQ5JUxMCQJBVp8gFKjfnfn/2cO+74dUdr6OmBhxx0UEdr\nkKQ9aVYGxlXX38LGsQUdrWF0wy9YYmBI2ovMysCYO28u83v27WgNW+b1dfTzJWlPcwxDklTEwJAk\nFTEwJElFDAxJUhEDQ5JUxMCQJBUxMCRJRQwMSVIRA0OSVMTAkCQVMTAkSUUMDElSkcZvPhgR3wI2\n1m9/BCwHLgHGgXXA0sxsRcQZwJnAVuDczFzddG2SpHKNBkZE7AOQmSe0LbscWJaZ10TEhcBpEXE9\ncBZwOLAvcF1EfCkzx5qsT5JUrukWxiHAgoj4Yv1Z5wCHZeY19forgJOBbcDazNwCbImI24DFwI0N\n1ydJKtT0GMYI8I7MPAV4KfCx7dYPA4uAAX7XbdW+XJLUJZpuYfwAuA0gM2+NiF8Dh7atHwA2AJuA\n/rbl/cD6nR144cL5u7fSaWrd3cfgYP/UG0rSH4imA2MJVdfS0og4kCoIroyI4zLzauBUYA1wA3Be\nRMwH9gEOphoQ36GRkc2NFj6VkdExhoaGO1qDJE3HTH/kNh0YHwA+GBETYxZLgF8DKyOiD7gZuKye\nJbUCuJaqm2yZA96S1F0aDYzM3Aq8cJJVx0+y7SpgVZP1SJJ2nRfuSZKKGBiSpCIGhiSpiIEhSSpi\nYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSpi\nYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCIGhiSpiIEhSSpi\nYEiSisxt+gMiYn/gm8CTgHHgkvrfdcDSzGxFxBnAmcBW4NzMXN10XZKk6Wm0hRER84CLgBGgB7gA\nWJaZx9bvT4uIA4CzgKOBU4DlEdHXZF2SpOlrukvqHcCFwM/r94dl5jX16yuAk4DHAmszc0tmbgJu\nAxY3XJckaZoaC4yIOB0Yyswr60U99d+EYWARMABsnGS5JKmLNDmGsQRoRcRJwGOADwGDbesHgA3A\nJqC/bXk/sH6qgy9cOH/3VboLWnf3MTjYP/WGkvQHorHAyMzjJl5HxFXAS4F3RMRxmXk1cCqwBrgB\nOC8i5gP7AAdTDYjv1MjI5kbqLjUyOsbQ0HBHa5Ck6Zjpj9zGZ0m1aQGvBlbWg9o3A5fVs6RWANdS\ndZEty8yxPViXJKnAHgmMzDyh7e3xk6xfBazaE7VIknaNF+5JkooYGJKkIgaGJKmIgSFJKmJgSJKK\nGBiSpCIGhiSpiIEhSSpiYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKK\nTPnEvfpZ268BAngFcDaw3MeoSlKZbdu2sXHjhk6XsUee6f0eYAg4HNgKPBz4APDCGX2yJO0lNm7c\nwGVfvokF+w10rIbR32zinHjIjI5REhiHZ+ahEfGUzPxNRLwIWDejT5WkvcyC/QbYr/9enS5jRkrG\nMMYjoq/t/f2A8YbqkSR1qZLA+Dfgy8ABEfFvwDeBf220KklS15mySyozPxwR3wSOB+YAT8vM7zVd\nmCSpu5ROq30McCBwcf1akrSXmTIwIuLtwFOBZwHzgCURcUHThUmSuktJC+MUqim0d2fmeuDJwKmN\nViVJ6jolgbFtu/fzJ1kmSfoDVxIYnwA+DtwnIl4FXAtc2mhVkqSuU3Lh3juBk4CfAA8C3pSZn2u0\nKklS1ykJjBsy8zDgC9M9eETMAVYCjwBawEuBzcAlVBf/rQOWZmYrIs4AzqS6/ci5mbl6up8nSWpO\nSZfULyLi2PomhNP1NGA8M58IvBF4K3A+sCwzjwV6gNMi4gDgLOBoqkH25dtdXS5J6rCSFsYRwFcB\nImJiWSsz50y1Y2Z+JiImuq8eAqwHTsrMa+plVwAnUw2ir83MLcCWiLgNWAzcWPY1JElNK7nSe3Am\nH5CZ2yLiEuCZwHOopuVOGAYWAQPAxkmWS5K6RMnzMN5MNf4woQXcBdxSOs6QmadHxP2BG4B92lYN\nABuATUD7jdr7qVojkqQuUdIl9UfAH1NNpe0Bnk11gn9iRByXma/b0Y4R8ULggZm5nCpktgE31vtd\nTXUB4BqqIDmvHifZBziYKW6hvnDhrgyp7D6tu/tm/DASSXuH3t4xFizo6+h5a3zbzIeFSwLjkcAx\nmbkZICIuBK7JzKMi4nvADgMDuAy4JCKuprqtyNnAfwMr60Htm4HL6llSK6iu8eilGhTf6RP9RkY2\nF5TenJHRMYaGhjtag6TZ4c47hxkdHaN3TufOW6OjM39Iaklg3IvqZD/xTecD+9Wve3a2Y2beBfzF\nJKuOn2TbVcCqgnokSR1QEhjvpupG+izV7c2fCqyIiFcC3uZckvYSU16HkZkrgOcCdwC3A8/OzPcC\nq4EljVYnSeoaJS0MgEdTPZp1OdVtzr+fmbc2VpUkqev4PAxJUhGfhyFJKuLzMCRJRXwehiSpSMmg\n92rgZ8DDgCfi8zAkaa+0w8CIiP2prtT+U+BWqm6oE4F9I+K6zNywZ0qUJHWDnXVJvRu4Drh/Zh6Z\nmUcC9we+C/zrnihOktQ9dtYltTgzn9u+IDPHIuIc4DvNliVJ6jY7a2HcNdnCzBzHWVKStNcpmSUl\nSdJOu6QeFRE/3sG6A5soRpLUvXYWGI/YY1VIkrreDgMjM2/fg3VIkrqcYxiSpCIGhiSpiIEhSSpi\nYEiSihgYkqQiBoYkqYiBIUkqYmBIkooYGJKkIgaGJKmIgSFJKmJgSJKKGBiSpCI7u735jETEPOBi\n4CBgPnAucAtwCTAOrAOWZmYrIs4AzgS2Audm5uqm6pIk7ZomWxgvAIYy81jgKcB7gPOBZfWyHuC0\niDgAOAs4GjgFWB4RfQ3WJUnaBY21MIBPAJfVr3uBLcBhmXlNvewK4GSq54OvzcwtwJaIuA1YDNzY\nYG2SpGlqLDAycwQgIvqpwuONwDvbNhkGFgEDwMZJlkuSukijg94R8SDgK8CHM/NSqrGLCQPABmAT\n0N+2vB9Y32RdkqTpa3LQ+/7AlcDLM/OqevG3I+K4zLwaOBVYA9wAnBcR84F9gIOpBsR3auHC+c0U\nXqh1dx+Dg/1Tbyhpr9fbO8aCBX0dPW+Nb5v50HCTYxjLqLqW3hQRb6qXnQ2sqAe1bwYuq2dJrQCu\npWrxLMvMsakOPjKyuaGyy4yMjjE0NNzRGiTNDnfeOczo6Bi9czp33hodnfK0OqUmxzDOpgqI7R0/\nybargFVN1SJJmjkv3JMkFTEwJElFDAxJUhEDQ5JUxMCQJBUxMCRJRQwMSVIRA0OSVMTAkCQVMTAk\nSUUMDElSEQNDklTEwJAkFTEwJElFDAxJUhEDQ5JUxMCQJBUxMCRJRQwMSVIRA0OSVMTAkCQVMTAk\nSUUMDElSEQNDklTEwJAkFTEwJElFDAxJUhEDQ5JUxMCQJBUxMCRJReY2/QERcSTwtsw8ISIeDlwC\njAPrgKWZ2YqIM4Azga3AuZm5uum6JEnT02gLIyJeB6wE5teLLgCWZeaxQA9wWkQcAJwFHA2cAiyP\niL4m65IkTV/TXVK3Ac+iCgeAwzLzmvr1FcBJwGOBtZm5JTM31fssbrguSdI0NRoYmfkpqm6mCT1t\nr4eBRcAAsHGS5ZKkLtL4GMZ2xtteDwAbgE1Af9vyfmD9VAdauHD+VJs0qnV3H4OD/VNvKGmv19s7\nxoIFfR09b41vm3lP/54OjG9HxHGZeTVwKrAGuAE4LyLmA/sAB1MNiO/UyMjmRgud8vNHxxgaGu5o\nDZJmhzvvHGZ0dIzeOZ07b42Ojs34GHsqMFr1v68GVtaD2jcDl9WzpFYA11J1kS3LzJl/M0nSbtV4\nYGTm7VQzoMjMW4HjJ9lmFbCq6VokSbvOC/ckSUUMDElSEQNDklTEwJAkFTEwJElFDAxJUhEDQ5JU\nxMCQJBUxMCRJRQwMSVIRA0OSVMTAkCQVMTAkSUUMDElSEQNDklTEwJAkFTEwJElFDAxJUhEDQ5JU\nxMCQJBUxMCRJRQwMSVIRA0OSVMTAkCQVMTAkSUUMDElSEQNDklTEwJAkFTEwJElF5na6gAkR0Qu8\nF1gMbAZenJk/7GxVkqQJ3dTCeCbQl5lHA28Azu9wPZKkNt0UGE8AvgCQmV8HjuhsOZKkdl3TJQUM\nAJva3m+LiN7MHO9UQVO5885fd7oESbPA+vXrGf3Npqk3bNDu+PxuCoxNQH/b+x2GxfOecWzPnilp\nZw7pdAGSZo2HcNRRh3a6iBnrpi6ptcBTASLiKOB7nS1HktSum1oYnwaeHBFr6/dLOlmMJOmeelqt\nVqdrkCTNAt3UJSVJ6mIGhiSpiIEhSSpiYEiSiszawIiILrgWQ5L2HrMuMCJiDkBmOr1LkqZh4vwZ\nEX0RMW+6+8+qabURMSczt0XE/lQ3K0zgp5n5ow6XJkldre38+SDgH4DLga9m5m9KjzFrAiMiDszM\nOyLiQKqL/K4H7gPcDnw0M7OT9UlSt4uI+wEfqf9+BWwBfpmZ3y/Zf1Z0SUXE3wJ/HREPB/4K+Fhm\nng28DXgg8EedrE+SulX9rKEJxwAHALcA/wgcBbw3IvYtOdasCAzgVuB+wJOAg4BHANSpOAT8SedK\nk6TuVHdDjUdEfz1+8XXgfVT37XsJcCXwY6DoruCzqUvqKOC5wI+AoPqC3wBeAzzbp/NJUqWeRfrq\nzHxn3Y3/EeBq4FDgTOChwF/W78/KzHUlx+3awIiIzwM9wE3AdVRNqEOpwmIT8HCqPrj/zMxbOlWn\nJHWbemLQFcDzgAuBdwHrgfcA/wFcChwOfGc6P7a76W61v1U3na4CTgcWUX3RNwK/BB4NDFK1LD6Y\nmXd1qExJ6lZ3A/9Tv/4iMAy8BTgLeCQwNzM/Od2DduUYRmZuA1YA/0Q1fnF5Zj6OasD7+cBrgSsN\nC0n6nYh4FEBmbqLqmTmSamD7Q8A/Ay1gKbB5V47ftV1SABGxgGrc4qnA+zPzyx0uSZK6UkT8C3A0\nVVf+r4AFwOeBlVTdUN+gmjh05q5243d1YADU072eDbwAeGtmXtvhkiSp60TEvTNzfUQ8Ftifagrt\nw4APZ+bnImI+MJCZQ7v6GV05htEuM++KiE8CW6mmf0mSft+G+t8bM7MVEV8BjgDeEBH3zcwPUV2G\nsMu6voUxISJ6vH+UJJWru/UfB9yamT+b6fFmTWBIkqZvd/7YNjAkSUW6clqtJKn7GBiSpCIGhiSp\niIEhSSpiYEiSihgYkqQiBoYkqYiBIUkq8v/kxOGyE+McawAAAABJRU5ErkJggg==\n", + "text": [ + "" + ] + } + ], + "prompt_number": 669 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "print_hist(train_data,'Pclass', \"Class\", [1,2,3], [1,2,3])" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "metadata": {}, + "output_type": "display_data", + "png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAELCAYAAADDZxFQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAEXFJREFUeJzt3X1sXXd9x/G37dYOSe2wtqYZD1vYOr5UGxmkrVo61oau\n0BUximBijwy6ka4s6wpiQ5CyMraWMKCIRhRWEiAwIZjIeFwWqKhYH4IgtOOplH1pGR0SrcCtndiJ\nS5zY3h/nejPhF/u65PhcJ++XFOXe3zm+93ul4/vx7+Gc0zU9PY0kSYfrbroASVJnMiAkSUUGhCSp\nyICQJBUZEJKkIgNCklR0QtMFSJ0qInqAq4A/oPpd6QU+A7wRuAn4ZmZe31yFUr3sQUhH9h7gHODC\nzHwGcDYQwBbAE4h0zLMHIRVExJOBPwRWZeY+gMwcj4grgPOAF8za90+By6l6GCcDb8nMf4qIVcCH\ngFNau+7IzGuO1L4Yn0taCHsQUtla4Fsz4TAjM3+YmZ9oPZ2OiBXAK4BLMnMt8PvAW1vb1wPfzcwz\ngd8ETo+IgUL7r0REf/0fSVoYexBS2STz/wHVlZn7I+L5wO9ExOnA04EVre07gX+PiF8APg+8PjNH\nI+Lw9tdl5lg9H0N69OxBSGVfAc6IiJNmN0bEEyJiB/AYqh7EE4GvA08CbgfeAHQBZOadwJOB9wKr\ngd0R8cwjtS/Gh5IWosuL9UllEXET8HPAn2XmWGt46J+BEapJ6ruB+4BrWsNFRMTVwD9Q9c7fDJCZ\nr4uILuAW4IPAU6l6H7Pbt2Xmhxb1A0rzMCCkI2gtc/1b4MXAIaAP+ATwd1R//X+TaqXTR6m+9H8E\nfAp4NbAOGKUKhCcAB4CvAa+kmsj+qfbMPLgoH0xqkwEhSSqqfZI6Iv4T2Nt6+t/AJmAbMEXVRd+Q\nmdMRsZ5qqeAh4NrM3FF3bZKkI6u1BxERy4Avtpb/zbR9Gnh7Zt4WEe8BPgd8CbgZOJNq8u8O4KzM\nnKitOEnSnOruQfw6sDwiPtd6r6uBtZl5W2v7TuC5VEsKd7XGYA9GxH3AGuDOmuuTJB1B3ctc9wNv\ny8yLgSuADx+2fQxYCQzw/8NQs9slSQ2puwfxHaplgGTmvRHxMPCMWdsHgD1Uqz1mn0naT7WUsGh6\nenq6q6vr6FcrSce2BX1x1h0Ql1ENFW2IiMdTffHfHBEXZOatwCVUa8B3A9dFRB+wDDiDagK7qKur\ni6EhTzxVZxoc7Pf4VEcaHFzYFV3qDoj3AR+IiJk5h8uAh4EtEdEL3ANsb61i2kx1Jmo3sNEJaklq\n1lI9D2Lav9DUqexB6HCTk5Ps3bun6TKIWN1RQ0ySdNzbu3cP2z//TZafNNBYDeP7Rrk6Vi/oZwwI\nSVoEy08a4KT+xzZdxoJ4NVdJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKR\nASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQ\nkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJ\nKjqh7jeIiMcBdwG/BUwB21r/3w1syMzpiFgPXA4cAq7NzB111yVJmlutPYiIOBG4CdgPdAHvADZm\n5vmt55dGxCrgSuA84GJgU0T01lmXJGl+dQ8xvQ14D/Bg6/nazLyt9XgncBFwNrArMw9m5ihwH7Cm\n5rokSfOoLSAi4uXAUGbe3Grqav2bMQasBAaAvYV2SVKD6pyDuAyYjoiLgKcDHwQGZ20fAPYAo0D/\nrPZ+YKTGuiRJbagtIDLzgpnHEfEF4ArgbRFxQWbeClwC3ALsBq6LiD5gGXAG1QT2nAYH++fbRWqM\nx6dm6+6eYPnyXlas6GushqnJhU/t1r6KaZZp4DXAltYk9D3A9tYqps3A7VRDXhszc2K+FxsaGqu1\nWOnRGhzs9/jUTxgeHmN8fILungON1TA+Pu/X6k9ZlIDIzGfPerqusH0rsHUxapEktccT5SRJRQaE\nJKnIgJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiS\nigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnI\ngJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklR0Qp0vHhE9\nwBbgKcA0cAVwANgGTAF3Axsyczoi1gOXA4eAazNzR521SZLmVncP4vnAVGY+C3gD8GbgemBjZp4P\ndAGXRsQq4ErgPOBiYFNE9NZcmyRpDrUGRGZ+Cvjz1tPVwAhwZmbe1mrbCVwEnA3sysyDmTkK3Aes\nqbM2SdLcap+DyMzJiNgG3AB8mKrXMGMMWAkMAHsL7ZKkhtQ6BzEjM18eEacBu4FlszYNAHuAUaB/\nVns/VW/jiAYH++faLDXK41OzdXdPsHx5LytW9DVWw9Tkwkft656kfinwxMzcBDwCTAJ3RsQFmXkr\ncAlwC1VwXBcRfVQBcgbVBPYRDQ2N1Vm69KgNDvZ7fOonDA+PMT4+QXfPgcZqGB+fWPDP1N2D2A5s\ni4hbgROBq4D/Ara0JqHvAba3VjFtBm6nGvbamJkL/zSSpKOm1oDIzEeA3ytsWlfYdyuwtc56JEnt\n80Q5SVKRASFJKpp3iKk1cfzXQAB/RTWPsMk5Akk6trXTg7gROAk4k+oyGKcD76uzKElS89oJiDMz\n8/XARGbuA/4EWFtvWZKkprUTEFOHXRfpVKoL7UmSjmHtBMQNwOeBVRFxA3AX8M5aq5IkNW7eSerM\n/FBE3EV17kIP8PzM/EbdhUmSmtXuMtenA48H3t96LEk6xs0bEBHxj8DzgBdRXS7jsoh4R92FSZKa\n1U4P4mLgpcCPM3MEeA7VRfYkScewdgJi8rDnfYU2SdIxpp2A+BjwUeDkiHg11RVXP1JrVZKkxrVz\nNde3U90W9PvAk4BrMvPfaq1KktS4dgJid2auBT5bdzGSpM7RzhDTDyPi/NZF+yRJx4l2ehBnAf8B\nEBEzbdOZ2VNTTZKkDtDOmdSDi1GIJKmztHM/iDcC07OapoFHgG9n5o66CpMkNaudOYhfpjoxbg+w\nl+pEuXXA+oh4a32lSZKa1E5APBVYl5mbM/MGqiWvp2bmC4HfrrU6SVJj2gmIx1Jdg2lGH9Ud5gC6\njnpFkqSO0M4qpncBd0bEZ6gu9/08YHNEvArwst+SdIyatweRmZuBlwAPAPcDL87MdwM7gMtqrU6S\n1Jh2ehAAT6O61egmqst+fysz762tKklS47wfhCSpyPtBSJKKvB+EJKnI+0FIkoramaTeAfwA+CXg\nWXg/CEk6LhwxICLiccB24NeAe6mGlS4EHhMRd2TmnsUpUZLUhLmGmN4F3AGclpnnZOY5wGnA14F3\nLkZxkqTmzDXEtCYzXzK7ITMnIuJq4Gv1liVJatpcAfFIqTEzpyKi0VVMX/rK13nwR6NNlsDU5CHO\nf+ZZjdYgSXVq90zqjvK9B/ey79DKRmvYN/xAo+8vSXWbKyB+NSK+d4Rtj6+jGElS55grIJ6yaFVI\nkjrOEQMiM+//WV44Ik4E3g/8ItXZ19cC3wa2AVPA3cCGzJyOiPXA5cAh4FpvZSpJzWvnTOpH64+A\nocw8n+rOczcC1wMbW21dwKURsQq4EjiP6rpPmyKit8a6JEltqHOS+mNUJ9pBFUQHgbWZeVurbSfw\nXKoT8HZl5kHgYETcB6wB7qyxNknSPGoLiMzcDxAR/VRh8Qbg7bN2GQNWAgPA3kK7JKlBtS5zjYgn\nAR8HbszMj0TEW2dtHgD2AKNA/6z2fmBkvtdesaLvaJa6YNM/7mVwsH/+HXVc8tjQbN3dEyxf3tvo\n99bU5MJH7msLiIg4DbgZ+IvM/EKr+asRcUFm3kp1T4lbgN3AdRHRBywDzqCawJ7T/v0H6im8TfvH\nJxgaGmu0BnWmwcF+jw39hOHhMcbHJ+juae57a3x8YsE/U2cPYiPVUNE1EXFNq+0qYHNrEvoeYHtr\nFdNmqsuId1NNYi/8k0iSjqo65yCuogqEw60r7LsV2FpXLZKkhatzmaskaQkzICRJRUvyYn1Sp5qc\nnOThhx9meLjZSeqVKx9LT09PozVo6TMgpKNo79497Phi0tW9rLEaxveN8rsXPY2TTz6lsRp0bDAg\npKNsxUkDdPcsb7oM6WfmHIQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnI\ngJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwI\nSVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJUZEBIkooMCElSkQEhSSo6oe43iIhzgLdk5rMj\n4nRgGzAF3A1syMzpiFgPXA4cAq7NzB111yVJmlutPYiIeC2wBehrNb0D2JiZ5wNdwKURsQq4EjgP\nuBjYFBG9ddYlSZpf3UNM9wEvogoDgLWZeVvr8U7gIuBsYFdmHszM0dbPrKm5LknSPGoNiMz8ONWw\n0YyuWY/HgJXAALC30C5JalDtcxCHmZr1eADYA4wC/bPa+4GR+V5oxYq++Xap1fSPexkc7J9/Rx1X\nursngAcbPT6nJns59dR+TjnF47NTdHdPsHx5b+PHxUItdkB8NSIuyMxbgUuAW4DdwHUR0QcsA86g\nmsCe0/79B2otdN73H59gaGis0RrUeYaHq2OiyeNzfHyChx4aY2rKqbxOMTw8xvj4BN09zR4XC7VY\nATHd+v81wJbWJPQ9wPbWKqbNwO1UQ14bM3Phn0SSdFTVHhCZeT/VCiUy815gXWGfrcDWumuRJLXP\nE+UkSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCID\nQpJUZEBIkooMCElSkQEhSSoyICRJRQaEJKnIgJAkFRkQkqQiA0KSVGRASJKKDAhJUpEBIUkqMiAk\nSUUGhCSpyICQJBUZEJKkIgNCklRkQEiSigwISVKRASFJKjIgJElFBoQkqciAkCQVGRCSpCIDQpJU\ndELTBcyIiG7g3cAa4ADwisz8brNVSdLxq5N6EC8EejPzPOB1wPUN1yNJx7VOCojfAD4LkJlfBs5q\nthxJOr51zBATMACMzno+GRHdmTl1+I6HDh7iwMQji1dZwcGDEwwPP9xoDeo8IyMj7N83Slf3RGM1\njO8bZWRkpLH3108bGRlhfN/o/DvW6NG8fycFxCjQP+t5MRwAXvriC7sWpyRpoVZz7rlN16DOs5pz\nz31G00UsWCcNMe0CngcQEecC32i2HEk6vnVSD+ITwHMiYlfr+WVNFiNJx7uu6enppmuQJHWgThpi\nkiR1EANCklRkQEiSipZMQETEq5quQZKOJ0tikjoi+oG7gH/NzNc3XY80W0R0US3RPjEzP9l0PdKM\n1rG5FiAz71rozy+VHsTZwBCwOiLe33Qx0ozWL+CnqALiTRHx3oZLkoD/OzY/DVxJdWy+cqGvsVQC\nIoEbgZcBfRGxpeF6pBlXAQ9l5gaqP2RWRsTKhmuSAF4JDGfmy4H3Ao+LiKcu5AWWREBk5g+AT2bm\nBNUv5IkR8ZGGy5IA/gd4ICKWA6cApwFeCkad4H5guPX4hcClwAci4mPtvsCSCAiAzBxv/f8Q8Fpg\nNCJ+vtmqJG4Hbpo5PqnmIfZExB9HxF82WZiOe3cAb2o9/nRmrs3MZwI9EXFaOy+wJCapSyKiJzMn\nm65DmhERA8DfA7cArwauzMxvNVuVVImIE4HnAH8DXJqZ817edckGhNRpIuKJwPeBLwMvy8zvNFyS\nBEBEvAR4AbAKuKrdP1w66WJ90lI3DPwLcE1m3tt0MdIsO4EvAYcy84F2f8gehHQURURvazGFtOQZ\nEJKkoiWzikmStLgMCElSkQEhSSoyICRJRQaEJKnIgJAkFf0v3vqQRHMkYZQAAAAASUVORK5CYII=\n", + "text": [ + "" + ] + } + ], + "prompt_number": 670 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "train_data['traveler'] = 1\n", + "train_data['Gender'] = train_data[\"Sex\"].map(lambda x: \"1\" if x == 'male' else 0).astype(int)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 675 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "travelers = len(train_data.index)\n", + "survivors = train_data[train_data.Survived == 1].sum()\n", + "surv_rate = round((survivors.Survived / travelers)*100,0)\n", + "print(\"The overall survival rate was {} %\".format(surv_rate))" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "output_type": "stream", + "stream": "stdout", + "text": [ + "The overall survival rate was 38.0 %\n" + ] + } + ], + "prompt_number": 676 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "final_data = train_data[['Pclass','Survived','traveler', 'Gender']]\n", + "final_data.head()" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassSurvivedtravelerGender
0 3 0 1 1
1 1 1 1 0
2 3 1 1 0
3 1 1 1 0
4 3 0 1 1
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 680, + "text": [ + " Pclass Survived traveler Gender\n", + "0 3 0 1 1\n", + "1 1 1 1 0\n", + "2 3 1 1 0\n", + "3 1 1 1 0\n", + "4 3 0 1 1" + ] + } + ], + "prompt_number": 680 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "final_data_g = final_data.groupby(['Pclass','Gender','Survived']).sum() \n", + "final_data_g['surv_rate'] = (final_data_g['traveler'] / travelers) * 100\n", + "del final_data_g['traveler']\n", + "final_data_g" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
surv_rate
PclassGenderSurvived
100 0.336700
1 10.213244
10 8.641975
1 5.050505
200 0.673401
1 7.856341
10 10.213244
1 1.907969
300 8.080808
1 8.080808
10 33.670034
1 5.274972
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 682, + "text": [ + " surv_rate\n", + "Pclass Gender Survived \n", + "1 0 0 0.336700\n", + " 1 10.213244\n", + " 1 0 8.641975\n", + " 1 5.050505\n", + "2 0 0 0.673401\n", + " 1 7.856341\n", + " 1 0 10.213244\n", + " 1 1.907969\n", + "3 0 0 8.080808\n", + " 1 8.080808\n", + " 1 0 33.670034\n", + " 1 5.274972" + ] + } + ], + "prompt_number": 682 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "final_data_g.reset_index(inplace=True)\n", + "final_data_g = final_data_g[final_data_g['Survived'] == 1]\n", + "final_data_g" + ], + "language": "python", + "metadata": {}, + "outputs": [ + { + "html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
PclassGenderSurvivedsurv_rate
1 1 0 1 10.213244
3 1 1 1 5.050505
5 2 0 1 7.856341
7 2 1 1 1.907969
9 3 0 1 8.080808
11 3 1 1 5.274972
\n", + "
" + ], + "metadata": {}, + "output_type": "pyout", + "prompt_number": 684, + "text": [ + " Pclass Gender Survived surv_rate\n", + "1 1 0 1 10.213244\n", + "3 1 1 1 5.050505\n", + "5 2 0 1 7.856341\n", + "7 2 1 1 1.907969\n", + "9 3 0 1 8.080808\n", + "11 3 1 1 5.274972" + ] + } + ], + "prompt_number": 684 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test = pd.read_csv(\"test.csv\")" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 685 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [ + "test[\"Survived\"] = 0\n", + "test.loc[(test[\"Pclass\"] == 1) & (test[\"Sex\"] == \"female\"), \"Survived\"] = 1\n", + "test.loc[(test[\"Pclass\"] == 3) & (test[\"Sex\"] == \"female\"), \"Survived\"] = 1\n", + "\n", + "test = test[[\"PassengerId\", \"Survived\"]]\n", + "test.to_csv(\"genderclassmodel.csv\", index=False)" + ], + "language": "python", + "metadata": {}, + "outputs": [], + "prompt_number": 686 + }, + { + "cell_type": "code", + "collapsed": false, + "input": [], + "language": "python", + "metadata": {}, + "outputs": [] + } + ], + "metadata": {} + } + ] +} \ No newline at end of file