diff --git a/.gitignore b/.gitignore
index d8eff1c..a33edc5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -110,4 +110,6 @@ com_crashlytics_export_strings.xml
 crashlytics.properties
 crashlytics-build.properties
 
-atusdata/
+# Ignore the data folder ?
+guess_lang/data/
+.DS_Store
diff --git a/Using_guess_lang.ipynb b/Using_guess_lang.ipynb
new file mode 100644
index 0000000..cfb7d23
--- /dev/null
+++ b/Using_guess_lang.ipynb
@@ -0,0 +1,279 @@
+{
+ "metadata": {
+  "name": "",
+  "signature": "sha256:4c467298348c6ff6bcbe571ee98f7c99f30feb8c4bd3e3d7d4f67a4b00a8a79e"
+ },
+ "nbformat": 3,
+ "nbformat_minor": 0,
+ "worksheets": [
+  {
+   "cells": [
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import matplotlib.pyplot as plt\n",
+      "import numpy as np\n",
+      "import pandas as pd"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 79
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "import guess_lang"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [],
+     "prompt_number": 80
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!python guess_lang"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Extra Trees Random Forest\r\n",
+        "01 :     Clojure \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "02 :     Clojure \t Correct!\r\n",
+        "03 :        Ruby \t Incorrect: Clojure\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "04 :     Clojure \t Correct!\r\n",
+        "05 :        Java \t Incorrect: Python\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "06 :      Python \t Correct!\r\n",
+        "07 :        Ruby \t Incorrect: Python\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "08 :      Python \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "09 :  Javascript \t Correct!\r\n",
+        "10 :  Javascript \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "11 :  Javascript \t Correct!\r\n",
+        "12 :  Javascript \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "13 :        Ruby \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "14 :        Ruby \t Correct!\r\n",
+        "15 :        Ruby \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "16 :     Haskell \t Correct!\r\n",
+        "17 :     Haskell \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "18 :     Haskell \t Correct!\r\n",
+        "19 :      Scheme \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "20 :      Scheme \t Correct!\r\n",
+        "21 :      Scheme \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "22 :        Java \t Correct!\r\n",
+        "23 :        Java \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "24 :       Scala \t Correct!\r\n",
+        "25 :       Scala \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "26 :         Tcl \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "27 :         Tcl \t Correct!\r\n",
+        "28 :         Php \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "29 :         Php \t Correct!\r\n",
+        "30 :         Php \t Correct!\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "31 :       Ocaml \t Correct!\r\n",
+        "32 :       Ocaml \t Correct!\r\n",
+        "Score: 0.90625\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 81
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!python guess_lang other_tests/multi_table.java"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Extra Trees Random Forest\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 :  Java\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 85
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!python guess_lang other_tests/scanner.py"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Extra Trees Random Forest\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 :  Python\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 86
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [
+      "!python guess_lang other_tests/methods.rb"
+     ],
+     "language": "python",
+     "metadata": {},
+     "outputs": [
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "Extra Trees Random Forest\r\n"
+       ]
+      },
+      {
+       "output_type": "stream",
+       "stream": "stdout",
+       "text": [
+        "1 :  Ruby\r\n"
+       ]
+      }
+     ],
+     "prompt_number": 87
+    },
+    {
+     "cell_type": "code",
+     "collapsed": false,
+     "input": [],
+     "language": "python",
+     "metadata": {},
+     "outputs": []
+    }
+   ],
+   "metadata": {}
+  }
+ ]
+}
\ No newline at end of file
diff --git a/classifier.data b/classifier.data
new file mode 100644
index 0000000..0061e61
Binary files /dev/null and b/classifier.data differ
diff --git a/guess_lang/.DS_Store b/guess_lang/.DS_Store
new file mode 100644
index 0000000..5008ddf
Binary files /dev/null and b/guess_lang/.DS_Store differ
diff --git a/guess_lang/__init__.py b/guess_lang/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/guess_lang/__main__.py b/guess_lang/__main__.py
new file mode 100644
index 0000000..f702b17
--- /dev/null
+++ b/guess_lang/__main__.py
@@ -0,0 +1,143 @@
+import sys
+import os
+import pickle
+import traverse_folders as tf
+from learner import Learner
+from classifier import Classifier
+from sklearn.cluster import KMeans
+
+
+class Language_Guesser:
+
+    def __init__(self):
+        pass
+
+if __name__ == '__main__':
+    training_path = "guess_lang/data/"
+    testing_path = "guess_lang/test/"
+    single_file = False
+    use_pickle = True
+
+    """ Check for command-line arguments.  If -n, don't use pickled files. """
+    if len(sys.argv) > 1:
+        if sys.argv[1] == '-n':
+            use_pickle = False
+        else:
+            testing_path = sys.argv[1]
+            single_file = True
+            try:
+                if sys.argv[2] == '-n':
+                    use_pickle = False
+            except:
+                pass
+
+    """ If learner object has been pickled, load it.
+        else, build the learner. (also check use_pickle arg) """
+    learner_path = "learner.data"
+    if os.path.isfile(learner_path) and use_pickle:
+        learner_file = open(learner_path, 'rb')
+        learner = pickle.load(learner_file)
+        learner_file.close()
+    else:
+        """ Read in files to use as training data. """
+        training_set = tf.build_train_set(training_path)
+        learner = Learner()
+        """ Build the DataFrame of features"""
+        for code, language in training_set:
+            learner.train(code, language)
+
+    """ If the classifier has been pickled, load it.
+        Else build the classifier. (also check use_pickle arg) """
+    classifier_path = "classifier.data"
+    if os.path.isfile(classifier_path) and use_pickle:
+        classifier_file = open(classifier_path, 'rb')
+        classifier = pickle.load(classifier_file)
+        classifier_file.close()
+    else:
+        classifier = Classifier(learner.training_df)
+
+    """ Build the list of files to test from testing_path.
+        If user input a specified file, open it and prepare it for test. """
+    testing_set = tf.build_test_set(testing_path)
+    answers = tf.get_answers("guess_lang/test.csv")
+    testing_set = sorted(testing_set, key=lambda x: x[0])
+
+    # print("Decision Tree")
+    # correct = 0
+    # for test_number,test in testing_set:
+    #     analysis = learner.analyze(test)
+    #     decision = classifier.decision_tree(analysis)[0].lower()
+    #     if decision == (answers[test_number]):
+    #         correct += 1
+    #     if single_file:
+    #         print(test_number, ": ", decision)
+    #     else:
+    #         print(test_number, ": ", decision,
+    #               "\tCorrect: ", answers[test_number])
+    # print("Score: {}".format(correct/32))
+
+    # print("Random Forest")
+    # correct = 0
+    # for test_number, test in testing_set:
+    #     analysis = learner.analyze(test)
+    #     decision = classifier.random_forest(analysis)[0].lower()
+    #     if decision == (answers[test_number]):
+    #         correct += 1
+    #     if single_file:
+    #         print(test_number, ": ", decision)
+    #     else:
+    #         print(test_number, ": ", decision,
+    #               "\tCorrect: ", answers[test_number])
+    # if not single_file:
+    #     print("Score: {}".format(correct/32))
+
+    print("Extra Trees Random Forest")
+    correct = 0
+    for test_number, test in testing_set:
+        analysis = learner.analyze(test)
+        decision = classifier.extreme_random_forest(analysis)[0].lower()
+        if decision == (answers[test_number]):
+            correct_string = "Correct!"
+            correct += 1
+        else:
+            correct_string = ("Incorrect: {}"
+            .format(answers[test_number].title()))
+        if single_file:
+            print(test_number, ": ", decision.title())
+        else:
+            print(str.zfill(str(test_number), 2), ": ",
+                  str.rjust(decision.title(), 10),
+                  "\t", correct_string)
+    if not single_file:
+        print("Score: {}".format(correct/32))
+
+    # print("Linear SVC")
+    # correct = 0
+    # for test_number,test in testing_set:
+    #     analysis = learner.analyze(test)
+    #     decision = classifier.linear_svc(analysis)[0].lower()
+    #     if decision == (answers[test_number]):
+    #         correct += 1
+    #     #print(test_number, ": ", decision)
+    # print("Score: {}".format(correct/32))
+
+    # print("Cluster")
+    # correct = 0
+    # for test_number,test in testing_set:
+    #     analysis = learner.analyze(test)
+    #     decision = classifier.cluster(analysis)[0]
+    #     if decision == (answers[test_number]):
+    #         correct += 1
+    #     #print(test_number, ": ", decision)
+    # print("Score: {}".format(correct/32))
+
+    """ Pickle learner and classifier if they aren't already pickled.
+        If user specified -n, pickle the new learner and classifier. """
+    if not os.path.isfile(learner_path) or not use_pickle:
+        output = open(learner_path, 'wb')
+        pickle.dump(learner, output, protocol=2)
+        output.close()
+    if not os.path.isfile(classifier_path) or not use_pickle:
+        output = open(classifier_path, 'wb')
+        pickle.dump(classifier, output, protocol=2)
+        output.close()
diff --git a/guess_lang/classifier.py b/guess_lang/classifier.py
new file mode 100644
index 0000000..306b913
--- /dev/null
+++ b/guess_lang/classifier.py
@@ -0,0 +1,101 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import re
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.ensemble import ExtraTreesClassifier
+from sklearn.svm import LinearSVC
+from sklearn.cluster import KMeans
+
+LANGUAGES = ['Clojure', 'Haskell', 'Java', 'JavaScript', 'OCaml',
+             'Perl', 'PHP', 'Python', 'Ruby', 'Scala', 'Scheme', 'Tcl']
+
+
+class Classifier:
+
+    def __init__(self, training_df):
+        """ Uses the training data and builds a dataFrame for the test files.
+        Classifiers are attributes of this class so that they can be saved
+        through pickling.  We don't need to save the actual classifiers,
+        only the results of each classifier's fit() method. """
+        self.training_df = training_df
+        self.testing_df = pd.DataFrame()
+        self.dt_fit = None
+        self.rf_fit = None
+        self.et_fit = None
+        self.lsvc_fit = None
+        self.cl_fit = None
+
+    def __str__(self):
+        return str(self.testing_df)
+
+    """ Each classification method is roughly the same.
+    If a fit() attribute has not been loaded through pickling, build a new
+    classifier, give it the features (all but the last column) and
+    the class (the last column). Perform a fit()
+    Make and return a prediction, given the testframe. """
+
+    def decision_tree(self, testframe):
+        code_count = len(self.testing_df.index)
+        if not self.dt_fit:
+            tree = DecisionTreeClassifier()
+            features = self.training_df.ix[:, :-1]
+            classes = self.training_df.ix[:, -1]
+            self.dt_fit = tree.fit(features, classes)
+        prediction = self.dt_fit.predict(testframe)
+        return prediction
+
+    def random_forest(self, testframe):
+        code_count = len(self.testing_df.index)
+        if not self.rf_fit:
+            forest = RandomForestClassifier(n_estimators=15,
+                                            criterion='gini',
+                                            max_features=None)
+            features = self.training_df.ix[:, :-1]
+            classes = self.training_df.ix[:, -1]
+            self.rf_fit = forest.fit(features, classes)
+        prediction = self.rf_fit.predict(testframe)
+        return prediction
+
+    def extreme_random_forest(self, testframe):
+        code_count = len(self.testing_df.index)
+        if not self.et_fit:
+            extra_trees = ExtraTreesClassifier(n_estimators=15,
+                                               criterion='gini',
+                                               max_features=None)
+            features = self.training_df.ix[:, :-1]
+            classes = self.training_df.ix[:, -1]
+            self.et_fit = extra_trees.fit(features, classes)
+        prediction = self.et_fit.predict(testframe)
+        return prediction
+
+    def linear_svc(self, testframe):
+        code_count = len(self.testing_df.index)
+        if not self.lsvc_fit:
+            linear_svc = LinearSVC(loss='l1')
+            features = self.training_df.ix[:, :-1]
+            classes = self.training_df.ix[:, -1]
+            self.lsvc_fit = linear_svc.fit(features, classes)
+        prediction = self.lsvc_fit.predict(testframe)
+        return prediction
+
+    def cluster(self, testframe):
+        """ Clustering is unsupervised learning so what if we cluster
+        the codes and then run each cluster through random forest
+        or another supervised algorithm in order to actually identify each.
+        """
+        code_count = len(self.testing_df.index)
+        cluster = KMeans(12)
+        # cluster.set_params(LANGUAGES)
+        features = self.training_df.ix[:, :-1]
+        classes = self.training_df.ix[:, -1]
+        try:
+            classifier = cluster.fit(features)  # ,classes)
+        except:
+            print(features)
+            print(classes)
+
+        prediction = classifier.predict(testframe)
+        print(prediction)
+        return prediction
diff --git a/guess_lang/data/.DS_Store b/guess_lang/data/.DS_Store
new file mode 100644
index 0000000..34c5e8d
Binary files /dev/null and b/guess_lang/data/.DS_Store differ
diff --git a/guess_lang/learner.py b/guess_lang/learner.py
new file mode 100644
index 0000000..91633f1
--- /dev/null
+++ b/guess_lang/learner.py
@@ -0,0 +1,80 @@
+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+import re
+
+
+class Learner:
+    """  A learner builds a dataframe of features to train on.
+    When a new file is input, add a new row onto the dataframe.
+    Loop through the list of features, which will fill in the
+    corresponding columns.
+    Each row will be a piece of code.  """
+
+    def __init__(self):
+        self.training_df = pd.DataFrame()
+        # A List of feature functions
+        self.features = [r'\$[\D]', r'[^;];[^.]', r';;[^;]', r';;;',
+                         r'include', r'let', r'{[^-]', r'{-',
+                         r'import', r'var', r'@', r'#', r'=>', r'js\.', r'/\*',
+                         r'->', '\(\*', r'|[^|]', r'& args', r'<?php'
+                         r'type', r'final', r'"""', r'<!', r'my', r'::',
+                         r'__name__', r'defn ', r'def ',
+                         r'__init__', r'=begin', r'puts', r'===', r'clojure\.',
+                         r'[^/]\*', r'haskell', r'__str__',
+                         r'\(function[ ]?\(', '\*[\w]']
+        """ When I initialize the columns of features, run each function with
+            arguments.  This returns a string representation to use in
+            printing.
+            This is handy if you want to print the actual dataframe. """
+        for column in self.features:
+            self.training_df[self.get_ratio(snip=column)] = \
+                pd.Series(index=self.training_df.index)
+
+    def __str__(self):
+        return str(self.training_df)
+
+    def train(self, code_path, language):
+        """ This builds the dataFrame of features from the training data.
+        First, check how many files are already in the dataFrame, this will be
+        the index of this file.  Once added to the df, loop through the
+        features and get the ratio of each. """
+        code_count = len(self.training_df.index)
+        try:
+            code = open(code_path).read()
+        except:
+            print("ERROR in training: {}".format(code_path))
+            print("{} files read successfully".format(code_count-1))
+        self.training_df.loc[code_count, "class"] = language
+        for feature in self.features:
+            column, value = self.get_ratio(code=code, snip=feature)
+            self.training_df.loc[code_count, column] = value
+
+    def analyze(self, code_path):
+        """ Gets the ratio of each feature for code that is being tested. """
+        try:
+            code = open(code_path).read()
+        except:
+            print("ERROR in testing: {}".format(code_path))
+        analysis = pd.DataFrame()
+        for feature in self.features:
+            column, value = self.get_ratio(code=code, snip=feature)
+            analysis.loc[0, column] = value
+        return analysis
+
+    """ Feature Functions:
+    Each function returns a tuple.
+    A string representation of the function and the value to be added. """
+
+    def get_ratio(self, code=None, snip=None):
+        """ Determines the number of times a given regular expression occurs
+        in the code and then returns its length divided by the length of the
+        code.  re.MULTILINE is a useful optional argument when looking for
+        occurrences in large pieces of code. """
+        title = "{}_ratio".format(snip)
+        if code is None:
+            return title
+        regex = r'(' + snip + r')'
+        count = len(list(re.finditer(regex, code, re.MULTILINE)))
+        # *10 makes longer regex weigh more
+        return (title, (len(snip)*count*10)/len(code))
diff --git a/guess_lang/pytests/test_features.py b/guess_lang/pytests/test_features.py
new file mode 100644
index 0000000..a1ac8a9
--- /dev/null
+++ b/guess_lang/pytests/test_features.py
@@ -0,0 +1,15 @@
+from guess_lang.learner import Learner
+
+def test_let_ratio():
+    """ Test the regular expression counting 'let's
+    return to a tuple and round the value to reliably test on floats. """
+    code = "  let x=0"
+    learner = Learner()
+    name, value = learner.let_ratio(code)
+    assert round(value,5) == round(1/9,5)
+
+def test_get_ratio():
+    code = "include some package"
+    learner = Learner()
+    name, value = learner.get_ratio(code, "include")
+    assert round(value,5) == round(7/20,5)
diff --git a/test.csv b/guess_lang/test.csv
similarity index 100%
rename from test.csv
rename to guess_lang/test.csv
diff --git a/test/1 b/guess_lang/test/1
similarity index 100%
rename from test/1
rename to guess_lang/test/1
diff --git a/test/10 b/guess_lang/test/10
similarity index 100%
rename from test/10
rename to guess_lang/test/10
diff --git a/test/11 b/guess_lang/test/11
similarity index 100%
rename from test/11
rename to guess_lang/test/11
diff --git a/test/12 b/guess_lang/test/12
similarity index 100%
rename from test/12
rename to guess_lang/test/12
diff --git a/test/13 b/guess_lang/test/13
similarity index 100%
rename from test/13
rename to guess_lang/test/13
diff --git a/test/14 b/guess_lang/test/14
similarity index 100%
rename from test/14
rename to guess_lang/test/14
diff --git a/test/15 b/guess_lang/test/15
similarity index 100%
rename from test/15
rename to guess_lang/test/15
diff --git a/test/16 b/guess_lang/test/16
similarity index 100%
rename from test/16
rename to guess_lang/test/16
diff --git a/test/17 b/guess_lang/test/17
similarity index 100%
rename from test/17
rename to guess_lang/test/17
diff --git a/test/18 b/guess_lang/test/18
similarity index 100%
rename from test/18
rename to guess_lang/test/18
diff --git a/test/19 b/guess_lang/test/19
similarity index 100%
rename from test/19
rename to guess_lang/test/19
diff --git a/test/2 b/guess_lang/test/2
similarity index 100%
rename from test/2
rename to guess_lang/test/2
diff --git a/test/20 b/guess_lang/test/20
similarity index 100%
rename from test/20
rename to guess_lang/test/20
diff --git a/test/21 b/guess_lang/test/21
similarity index 100%
rename from test/21
rename to guess_lang/test/21
diff --git a/test/22 b/guess_lang/test/22
similarity index 100%
rename from test/22
rename to guess_lang/test/22
diff --git a/test/23 b/guess_lang/test/23
similarity index 100%
rename from test/23
rename to guess_lang/test/23
diff --git a/test/24 b/guess_lang/test/24
similarity index 100%
rename from test/24
rename to guess_lang/test/24
diff --git a/test/25 b/guess_lang/test/25
similarity index 100%
rename from test/25
rename to guess_lang/test/25
diff --git a/test/26 b/guess_lang/test/26
similarity index 100%
rename from test/26
rename to guess_lang/test/26
diff --git a/test/27 b/guess_lang/test/27
similarity index 100%
rename from test/27
rename to guess_lang/test/27
diff --git a/test/28 b/guess_lang/test/28
similarity index 100%
rename from test/28
rename to guess_lang/test/28
diff --git a/test/29 b/guess_lang/test/29
similarity index 100%
rename from test/29
rename to guess_lang/test/29
diff --git a/test/3 b/guess_lang/test/3
similarity index 100%
rename from test/3
rename to guess_lang/test/3
diff --git a/test/30 b/guess_lang/test/30
similarity index 100%
rename from test/30
rename to guess_lang/test/30
diff --git a/test/31 b/guess_lang/test/31
similarity index 100%
rename from test/31
rename to guess_lang/test/31
diff --git a/test/32 b/guess_lang/test/32
similarity index 100%
rename from test/32
rename to guess_lang/test/32
diff --git a/test/4 b/guess_lang/test/4
similarity index 100%
rename from test/4
rename to guess_lang/test/4
diff --git a/test/5 b/guess_lang/test/5
similarity index 100%
rename from test/5
rename to guess_lang/test/5
diff --git a/test/6 b/guess_lang/test/6
similarity index 100%
rename from test/6
rename to guess_lang/test/6
diff --git a/test/7 b/guess_lang/test/7
similarity index 100%
rename from test/7
rename to guess_lang/test/7
diff --git a/test/8 b/guess_lang/test/8
similarity index 100%
rename from test/8
rename to guess_lang/test/8
diff --git a/test/9 b/guess_lang/test/9
similarity index 100%
rename from test/9
rename to guess_lang/test/9
diff --git a/guess_lang/traverse_folders.py b/guess_lang/traverse_folders.py
new file mode 100644
index 0000000..ce69c06
--- /dev/null
+++ b/guess_lang/traverse_folders.py
@@ -0,0 +1,80 @@
+import os
+import re
+import pandas as pd
+from bs4 import BeautifulSoup
+
+""" We want to recognize the following languages:
+    Clojure, Haskell, Java, JavaScript,
+    OCaml, Perl, PHP, Python,
+    Ruby, Scala, Scheme, Tcl """
+VALID_EXTENSIONS = ['.clojure', '.hs', '.lhs', '.ghc', '.java',
+                    '.javascript', '.js', '.class', '.jar', '.ocaml', '.pl',
+                    '.pm', '.t', '.pod', '.perl', '.php',
+                    '.phtml', '.php3', '.php4', '.php5', '.phps',
+                    '.py', '.pyw', '.pyc', '.pyo', '.pyd',
+                    '.python', '.python2', '.python3', '.rb', '.rbw',
+                    '.ruby', '.jruby', '.scala', '.scm', '.ss',
+                    '.tcl']
+
+LANGUAGE_DICT = {'Clojure': ['.clojure'],
+                 'Haskell': ['.hs', '.lhs', '.ghc'],
+                 'Java': ['.java', '.class', '.jar'],
+                 'JavaScript': ['.js', '.javascript'],
+                 'OCaml': ['.ocaml'],
+                 'Perl': ['.pl', '.pm', '.t', '.pod', '.perl'],
+                 'PHP': ['.php', '.phtml', '.php3', '.php4', '.php5', '.phps'],
+                 'Python': ['.py', '.pyw', '.pyc', '.pyo', '.pyd',
+                            '.python', '.python2', '.python3'],
+                 'Ruby': ['.rb', '.rbw', '.ruby', '.jruby'],
+                 'Scala': ['.scala'],
+                 'Scheme': ['.scm', '.ss'],
+                 'Tcl': ['.tcl']}
+
+
+def build_train_set(folder_path):
+    """ Builds a list of file paths for
+    files with the acceptable extension. """
+    training_code = []
+    for directory, subdirs, files in os.walk(folder_path):
+        for file in files:
+            extension = re.search(r'.(\w+)$', file).group(0)
+            if extension and extension in VALID_EXTENSIONS:
+                filepath = str(directory) + "/" + str(file)
+                """ It doesn't seem like a best possible practice to use a
+                dictionary like this but I'm leaning towards brevity and space
+                over speed right now. """
+                language = "unknown"
+                for key, values in LANGUAGE_DICT.items():
+                    if extension in values:
+                        language = key
+                        break
+                training_code.append((filepath, language))
+    return training_code
+
+
+def build_test_set(folder_path):
+    """ Test set is a list of tuples, test_number is the file's number,
+        filepath is the path.  If user input a specified file, just return
+        with test_number == 1. """
+    testing_code = []
+    if os.path.isfile(folder_path):
+        return [(1, folder_path)]
+    for directory, subdirs, files in os.walk(folder_path):
+        for file in files:
+            filepath = str(directory) + str(file)
+            test_number = int(file)
+            testing_code.append((test_number, filepath))
+    return testing_code
+
+
+def get_answers(answer_path):
+    """ Reads the test.csv for accuracy feedback. """
+    df = pd.read_csv(answer_path)
+    df.index = df["Filename"]
+    return df.to_dict()["Language"]
+
+
+if __name__ == '__main__':
+    """ Check for build_training_set(). """
+    files = build_training_set()
+    print(files)
diff --git a/learner.data b/learner.data
new file mode 100644
index 0000000..4c30e90
Binary files /dev/null and b/learner.data differ
diff --git a/other_tests/jSON_Walk.py b/other_tests/jSON_Walk.py
new file mode 100644
index 0000000..c9797ca
--- /dev/null
+++ b/other_tests/jSON_Walk.py
@@ -0,0 +1,50 @@
+__author__ = 'jasonaylward'
+
+import os
+
+#Create a jSON file that we'll write to
+#or overwrite the existing file
+jSONFile = open("plateContents.json", "w")
+
+#Open the jSON Object
+jSONString = "{"
+
+#Get the names of each plate directory.
+#These will be used as keys for each jSON pair
+entries = os.listdir(".")
+plates = [];
+for entry in entries:
+    if os.path.isdir(entry) and entry[:1] != ".":
+        plates.append( entry)
+
+
+
+for plate in plates:
+    # plateString = "key":[ ...
+    plateString = "\""+plate+"\":["
+    for dir, subdirs, files in os.walk(plate):
+        dir = dir[len(plate):]
+        for file in files:
+            #if the file is a .png image files, add to the array.
+            if ".png" in file:
+                fileString = "\"" + dir + "/" + file + "\","
+                plateString = plateString + fileString
+    #if string ends in a ',' then remove before adding ']'
+    if plateString[-1:] == ",":
+        plateString = plateString[:-1]
+
+    #close plateString so it is "key":[...]
+    plateString = plateString + "],"
+    jSONString = jSONString + plateString
+
+
+# delete the last comma in the list of files
+if jSONString[-1:] == ",":
+    jSONString = jSONString[:-1]
+
+#Close the jSON Object
+jSONString = jSONString+"}"
+
+#print jSONString
+jSONFile.write(jSONString)
+jSONFile.close()
\ No newline at end of file
diff --git a/other_tests/methods.rb b/other_tests/methods.rb
new file mode 100644
index 0000000..2d03302
--- /dev/null
+++ b/other_tests/methods.rb
@@ -0,0 +1,21 @@
+# Add the strings before and after around each parm and print
+def surround(before, after, *items)
+    items.each { |x| print before, x, after, "\n" }
+end
+
+surround('[', ']', 'this', 'that', 'the other')
+print "\n"
+
+surround('<', '>', 'Snakes', 'Turtles', 'Snails', 'Salamanders', 'Slugs',
+        'Newts')
+print "\n"
+
+def boffo(a, b, c, d)
+    print "a = #{a} b = #{b}, c = #{c}, d = #{d}\n"
+end
+
+# Use * to adapt between arrays and arguments
+a1 = ['snack', 'fast', 'junk', 'pizza']
+a2 = [4, 9]
+boffo(*a1)
+boffo(17, 3, *a2)
diff --git a/other_tests/multi_table.java b/other_tests/multi_table.java
new file mode 100644
index 0000000..68d8f6a
--- /dev/null
+++ b/other_tests/multi_table.java
@@ -0,0 +1,16 @@
+import java.util.Scanner;
+
+class MultiplicationTable
+{
+   public static void main(String args[])
+   {
+      int n, c;
+      System.out.println("Enter an integer to print it's multiplication table");
+      Scanner in = new Scanner(System.in);
+      n = in.nextInt();
+      System.out.println("Multiplication table of "+n+" is :-");
+
+      for ( c = 1 ; c <= 10 ; c++ )
+         System.out.println(n+"*"+c+" = "+(n*c));
+   }
+}
diff --git a/other_tests/scanner.py b/other_tests/scanner.py
new file mode 100644
index 0000000..fa37fa1
--- /dev/null
+++ b/other_tests/scanner.py
@@ -0,0 +1,24 @@
+import re
+import sys
+import urllib2
+import BeautifulSoup
+
+usage = "Run the script: ./geolocate.py IPAddress"
+
+if len(sys.argv)!=2:
+    print(usage)
+    sys.exit(0)
+
+if len(sys.argv) > 1:
+    ipaddr = sys.argv[1]
+
+geody = "http://www.geody.com/geoip.php?ip=" + ipaddr
+html_page = urllib2.urlopen(geody).read()
+soup = BeautifulSoup.BeautifulSoup(html_page)
+
+# Filter paragraph containing geolocation info.
+paragraph = soup('p')[3]
+
+# Remove html tags using regex.
+geo_txt = re.sub(r'<.*?>', '', str(paragraph))
+print geo_txt[32:].strip()
diff --git a/requirements.txt b/requirements.txt
index 9170871..ec229bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,3 @@
 scikit-learn
+Pandas
 textblob
\ No newline at end of file