diff --git a/spam.ipynb b/spam.ipynb
new file mode 100644
index 0000000..e998c63
--- /dev/null
+++ b/spam.ipynb
@@ -0,0 +1,924 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:87246e0b638c75117130cc2dcee08ee41768dd2111d3f025edc934e53bfbb81c"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "from sklearn.cross_validation import train_test_split\n",
+      "from sklearn.feature_selection import VarianceThreshold\n",
+      "from sklearn import cluster\n",
+      "from sklearn.neighbors import KNeighborsClassifier\n",
+      "from sklearn.naive_bayes import GaussianNB\n",
+      "from sklearn.linear_model import LogisticRegression\n",
+      "from sklearn import metrics\n",
+      "from sklearn.cross_validation import cross_val_score\n",
+      "import pandas as pd\n",
+      "import numpy as np\n",
+      "from sklearn import tree\n",
+      "from sklearn.tree import DecisionTreeClassifier, export_graphviz\n",
+      "from sklearn import decomposition\n",
+      "from sklearn.externals.six import StringIO  "
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 151
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Reads panda data frame and get the values out: numpy"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "spam_file = pd.read_csv(\"spambase.data\", header=None)\n",
+      "spam=spam_file.values"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 131
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Get the last column out: Response Variable"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "spam_target = spam[0::,-1]\n",
+      "spam_target.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 144,
+       "text": [
+        "array([ 1.,  1.,  1., ...,  0.,  0.,  0.])"
+       ]
+      }
+     ],
+     "prompt_number": 144
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Get all columns but the last as a numpy object of predictors "
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "spam_data = spam[0::,:-1:]\n",
+      "spam_data.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 39,
+       "text": [
+        "(4601, 57)"
+       ]
+      }
+     ],
+     "prompt_number": 39
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Split dataset into train and test datasets also split by predictors or response"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "x_train, x_test, y_train, y_test = train_test_split(spam_data, spam_target, test_size=0.4, random_state=0)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 40
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Gaussian Analysis: 80%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = GaussianNB()\n",
+      "clf = clf.fit(x_train, y_train)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 41
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "predicted = clf.predict(x_test)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 42
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.94      0.73      0.83      1097\n",
+        "        1.0       0.70      0.93      0.80       744\n",
+        "\n",
+        "avg / total       0.85      0.82      0.82      1841\n",
+        "\n",
+        "[[806 291]\n",
+        " [ 49 695]]\n",
+        "0.803468208092\n"
+       ]
+      }
+     ],
+     "prompt_number": 44
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Decision Tree: 87%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = tree.DecisionTreeClassifier()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 147
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = clf.fit(x_train, y_train)\n",
+      "predicted = clf.predict(x_test)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 148
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.92      0.91      0.91      1097\n",
+        "        1.0       0.87      0.88      0.88       744\n",
+        "\n",
+        "avg / total       0.90      0.90      0.90      1841\n",
+        "\n",
+        "[[999  98]\n",
+        " [ 88 656]]\n",
+        "0.875834445928\n"
+       ]
+      }
+     ],
+     "prompt_number": 149
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "\n",
+      "with open(\"spam_tree.dot\", 'w') as f:\n",
+      "    f = export_graphviz(clf, out_file=f)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 152
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!dot -Tpdf spam_tree.dot -o spam_tree.pdf"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 154
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Logistic Regression: 89%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = LogisticRegression()"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 52
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = clf.fit(x_train, y_train)\n",
+      "predicted = clf.predict(x_test)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 53
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.92      0.95      0.93      1097\n",
+        "        1.0       0.92      0.87      0.89       744\n",
+        "\n",
+        "avg / total       0.92      0.92      0.91      1841\n",
+        "\n",
+        "[[1037   60]\n",
+        " [  96  648]]\n",
+        "0.892561983471\n"
+       ]
+      }
+     ],
+     "prompt_number": 54
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Nearest Neighbors: 68%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = KNeighborsClassifier(n_neighbors=2)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 68
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = clf.fit(x_train, y_train)\n",
+      "predicted = clf.predict(x_test)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "[ 1.  0.  0. ...,  1.  1.  0.]\n"
+       ]
+      }
+     ],
+     "prompt_number": 75
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.76      0.94      0.84      1097\n",
+        "        1.0       0.86      0.56      0.68       744\n",
+        "\n",
+        "avg / total       0.80      0.79      0.77      1841\n",
+        "\n",
+        "[[1026   71]\n",
+        " [ 324  420]]\n",
+        "0.68016194332\n"
+       ]
+      }
+     ],
+     "prompt_number": 70
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Dimensionality Reduction: PCA"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "pca = decomposition.RandomizedPCA(n_components=50, whiten=True)\n",
+      "pca.fit(x_train, y_train)\n",
+      "x_train_pca = pca.transform(x_train)\n",
+      "x_test_pca = pca.transform(x_test)\n",
+      "selected_vectors_trainx = x_train_pca[::,0:10:]\n",
+      "selected_vectors_testx = x_test_pca[::,0:10:]\n",
+      "#print(pca.explained_variance_ratio_)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 158
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Logistic Regression - Reduction through Principal Component Analysis 80%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = LogisticRegression()\n",
+      "\n",
+      "clf = clf.fit(selected_vectors_trainx, y_train)\n",
+      "predicted = clf.predict(selected_vectors_testx)\n",
+      "\n",
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.86      0.90      0.88      1097\n",
+        "        1.0       0.84      0.78      0.81       744\n",
+        "\n",
+        "avg / total       0.85      0.85      0.85      1841\n",
+        "\n",
+        "[[990 107]\n",
+        " [166 578]]\n",
+        "0.808957312806\n"
+       ]
+      }
+     ],
+     "prompt_number": 159
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Reduction through Variance Threshold"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "sel = VarianceThreshold()\n",
+      "x_train_var = sel.fit_transform(x_train)\n",
+      "x_test_var = sel.fit_transform(x_test)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 111
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Logistic Regression - Reduction through Variance Threshold 89%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "clf = LogisticRegression()\n",
+      "\n",
+      "clf = clf.fit(x_train_var, y_train)\n",
+      "predicted = clf.predict(x_test_var)\n",
+      "\n",
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.91      0.94      0.93      1097\n",
+        "        1.0       0.91      0.87      0.89       744\n",
+        "\n",
+        "avg / total       0.91      0.91      0.91      1841\n",
+        "\n",
+        "[[1036   61]\n",
+        " [  97  647]]\n",
+        "0.891184573003\n"
+       ]
+      }
+     ],
+     "prompt_number": 112
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Reduction through variable selection"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#Word Frequency\n",
+      "spam_data_words = spam[0::,0:48:]\n",
+      "spam_data_words.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 117,
+       "text": [
+        "(4601, 48)"
+       ]
+      }
+     ],
+     "prompt_number": 117
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#char Frequency\n",
+      "spam_data_chars = spam[0::,48:54:]\n",
+      "spam_data_chars.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 121,
+       "text": [
+        "(4601, 6)"
+       ]
+      }
+     ],
+     "prompt_number": 121
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#word + chars freqs\n",
+      "spam_data_words_chars = spam[0::,0:54:]\n",
+      "spam_data_words_chars.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 132,
+       "text": [
+        "(4601, 54)"
+       ]
+      }
+     ],
+     "prompt_number": 132
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#word + chars freqs + cap avg\n",
+      "spam_data_words_chars_cap = spam[0::,0:55:]\n",
+      "spam_data_words_chars_cap.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 136,
+       "text": [
+        "(4601, 55)"
+       ]
+      }
+     ],
+     "prompt_number": 136
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#capitals average\n",
+      "spam_data_cap_avg = spam[0::,54:55:]\n",
+      "spam_data_cap_avg.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 122,
+       "text": [
+        "(4601, 1)"
+       ]
+      }
+     ],
+     "prompt_number": 122
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#capital longest\n",
+      "spam_data_cap_avg = spam[0::,55:56:]\n",
+      "spam_data_cap_avg.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 123,
+       "text": [
+        "(4601, 1)"
+       ]
+      }
+     ],
+     "prompt_number": 123
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "#capital total\n",
+      "spam_data_cap_avg = spam[0::,56:57:]\n",
+      "spam_data_cap_avg.shape"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "metadata": {},
+       "output_type": "pyout",
+       "prompt_number": 124,
+       "text": [
+        "(4601, 1)"
+       ]
+      }
+     ],
+     "prompt_number": 124
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Logistic Regression with features: Word and Char Frequencies: 88%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "x_train, x_test, y_train, y_test = train_test_split(spam_data_words_chars, spam_target, test_size=0.4, random_state=0)\n",
+      "clf = LogisticRegression()\n",
+      "\n",
+      "clf = clf.fit(x_train, y_train)\n",
+      "predicted = clf.predict(x_test)\n",
+      "\n",
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data_words_chars, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.91      0.94      0.93      1097\n",
+        "        1.0       0.91      0.87      0.89       744\n",
+        "\n",
+        "avg / total       0.91      0.91      0.91      1841\n",
+        "\n",
+        "[[1032   65]\n",
+        " [ 100  644]]\n",
+        "0.88644184446\n"
+       ]
+      }
+     ],
+     "prompt_number": 138
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Logistic Regression with Word Frequencies: 87%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "x_train, x_test, y_train, y_test = train_test_split(spam_data_words, spam_target, test_size=0.4, random_state=0)\n",
+      "clf = LogisticRegression()\n",
+      "\n",
+      "clf = clf.fit(x_train, y_train)\n",
+      "predicted = clf.predict(x_test)\n",
+      "\n",
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data_words, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.90      0.93      0.92      1097\n",
+        "        1.0       0.90      0.85      0.88       744\n",
+        "\n",
+        "avg / total       0.90      0.90      0.90      1841\n",
+        "\n",
+        "[[1025   72]\n",
+        " [ 108  636]]\n",
+        "0.876033057851\n"
+       ]
+      }
+     ],
+     "prompt_number": 140
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Logistic Regression with Char Frequencies: 72%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "x_train, x_test, y_train, y_test = train_test_split(spam_data_chars, spam_target, test_size=0.4, random_state=0)\n",
+      "clf = LogisticRegression()\n",
+      "\n",
+      "clf = clf.fit(x_train, y_train)\n",
+      "predicted = clf.predict(x_test)\n",
+      "\n",
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data_chars, spam_target, cv=5)"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.78      0.95      0.86      1097\n",
+        "        1.0       0.90      0.60      0.72       744\n",
+        "\n",
+        "avg / total       0.83      0.81      0.80      1841\n",
+        "\n",
+        "[[1047   50]\n",
+        " [ 297  447]]\n",
+        "0.720386784851\n"
+       ]
+      }
+     ],
+     "prompt_number": 141
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Logistic Regression with Word and Char Frequencies as well as capital average: 88%"
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "x_train, x_test, y_train, y_test = train_test_split(spam_data_words_chars_cap, spam_target, test_size=0.4, random_state=0)\n",
+      "clf = LogisticRegression()\n",
+      "\n",
+      "clf = clf.fit(x_train, y_train)\n",
+      "predicted = clf.predict(x_test)\n",
+      "\n",
+      "print(metrics.classification_report(y_test, predicted))\n",
+      "print(metrics.confusion_matrix(y_test, predicted))\n",
+      "print(metrics.f1_score(y_test, predicted))\n",
+      "scores = cross_val_score(clf, spam_data_words_chars_cap, spam_target, cv=5)\n",
+      "\n"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "             precision    recall  f1-score   support\n",
+        "\n",
+        "        0.0       0.91      0.94      0.93      1097\n",
+        "        1.0       0.91      0.87      0.89       744\n",
+        "\n",
+        "avg / total       0.91      0.91      0.91      1841\n",
+        "\n",
+        "[[1034   63]\n",
+        " [  98  646]]\n",
+        "0.889194769443\n"
+       ]
+      }
+     ],
+     "prompt_number": 142
+    },
+    {
+     "cell_type": "heading",
+     "level": 2,
+     "metadata": {},
+     "source": [
+      "Conclusion"
+     ]
+    },
+    {
+     "cell_type": "heading",
+     "level": 6,
+     "metadata": {},
+     "source": [
+      "The best performance was obtained by running a logistic regression on all variables. 89.5% success ratio.  Results:\n",
+      "\n",
+      "Log Regression: 89%\n",
+      "Gaussian: 80%\n",
+      "Decision Tree: 87%\n",
+      "Nearest Neighbors: 67%\n",
+      "\n",
+      "Different feature reduction methods were also tested: principal components, variance threshold and different subsets of variables but there were no major improvements. It is important to notice that the char frequencies don't bring much to the model. More of the data variability is explained by the word frequencies only (87%). If 87% was an acceptable level of success, it would be better to use the model with just the word frequencies."
+     ]
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file