diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..36b46cf --- /dev/null +++ b/.gitignore @@ -0,0 +1,19 @@ +*.py[cod] +__pycache__/ +__pycache__ + +# OSX +.DS_Store + +# Eclipse +.project +.pydevproject +/.settings + +# Do not commit data files +/data/*.csv +/models/*.pkl +/models/*.npy + +# generally do not commit node_modules (controversial...) +/node_modules \ No newline at end of file diff --git a/README.md b/README.md index 956a501..6a0dd17 100755 --- a/README.md +++ b/README.md @@ -12,7 +12,8 @@ The Armchair Analysis data is *not free*. It costs $49 (one-time) to gain access data from the 2000-2014 NFL seasons. There is also a professional package that will provide weekly updates. Without the Armchair Analysis data, you will not be able to use much of this code. -This code currently requires Python 2.7 and is not Python 3 compliant to our knowledge. Questions about the Python code can be directed to [Trey Causey](mailto:trey@thespread.us). +This code currently requires Python 2.7 and or Python 3.4+. +Questions about the Python code can be directed to [Trey Causey](mailto:trey@thespread.us). Please note that none of the file operations are supported on Windows. @@ -27,6 +28,10 @@ NOTE: If you are unable to purchase the Armchair Analysis data, [Ben Dilday](htt - pandas - scikit-learn +## Node.js package requirements + +- underscore + ## Usage Unzip the play-by-play data into a directory. Run the following code from the directory diff --git a/bot.py b/bot.py index ef75128..c830db3 100755 --- a/bot.py +++ b/bot.py @@ -8,6 +8,12 @@ import winprob as wp +try: + raw_input +except NameError: + raw_input = input # python3 + +import sys def load_data(): click.echo('Loading data and setting up model.') @@ -27,9 +33,14 @@ def load_data(): return data, model def fg_make_prob(situation): - args = ' '.join("--%s=%r" % (key,val) for (key,val) in situation.iteritems()) + if sys.version_info[0] >= 3: + args = ' '.join("--%s=%r" % (key,val) for (key,val) in situation.items()) + else: + args = ' '.join("--%s=%r" % (key,val) for (key,val) in situation.iteritems()) model_fg = muterun_js('model-fg/model-fg.js', args) - return model_fg.stdout.split()[-1] + stdoutResults = model_fg.stdout + stdoutSplit = stdoutResults.split() + return stdoutSplit[-1] @click.command() def run_bot(): diff --git a/data_prep.py b/data_prep.py index 6d2b9ba..abec6b6 100755 --- a/data_prep.py +++ b/data_prep.py @@ -1,7 +1,7 @@ from __future__ import division, print_function import os - +import sys import click import numpy as np import pandas as pd @@ -68,8 +68,9 @@ def load_pbp(pbp_data_fname, games, remove_knees=False): pbp = pbp[pbp.qtr <= 4] # pid 183134 should have a value of 0 for min, but has "0:00" - pbp['min'] = pbp['min'].replace({'0:00': 0}) - pbp['min'] = pbp['min'].astype(np.int64) + if sys.version_info[0] == 2: + pbp['min'] = pbp['min'].replace({'0:00': 0}) + pbp['min'] = pbp['min'].astype(np.int64) # Restrict to regular season games after 2000 pbp = pbp[pbp.gid.isin(games.index)] diff --git a/winprob.py b/winprob.py index 26e7d13..3cfbddc 100755 --- a/winprob.py +++ b/winprob.py @@ -4,6 +4,8 @@ import random import sys +from sklearn.utils.validation import NotFittedError + from collections import OrderedDict import plays as p @@ -126,7 +128,12 @@ def generate_win_probabilities(situation, scenarios, model, data, **kwargs): # Note there is more information in situation than just model features. feature_vec = [val for key, val in situation.items() if key in features] - feature_vec = data['scaler'].transform(feature_vec) + try: + feature_vec = data['scaler'].transform(feature_vec) + except NotFittedError: + raise Exception("Sklearn reports that the instance is not yet fitted. " + + "This usually means that the version of python used to train " + + "the model is different from the version you are currently running.") probs['pre_play_wp'] = model.predict_proba(feature_vec)[0][1]