-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
added gaussian process #19
base: master
Are you sure you want to change the base?
Changes from all commits
f068daa
8de1682
7bc517e
184dd3b
315087d
aa7244f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,3 +105,6 @@ ENV/ | |
|
||
# Pickles | ||
notebooks/pickle_jar/ | ||
|
||
# IDE | ||
.idea/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
api | ||
pymc3_models | ||
============ | ||
|
||
.. toctree:: | ||
:maxdepth: 4 | ||
|
||
pymc3_models.models | ||
pymc3_models |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
pymc3\_models package | ||
===================== | ||
|
||
Subpackages | ||
----------- | ||
|
||
.. toctree:: | ||
|
||
pymc3_models.models | ||
|
||
Submodules | ||
---------- | ||
|
||
pymc3\_models.exc module | ||
------------------------ | ||
|
||
.. automodule:: pymc3_models.exc | ||
:members: | ||
:undoc-members: | ||
:show-inheritance: | ||
|
||
|
||
Module contents | ||
--------------- | ||
|
||
.. automodule:: pymc3_models | ||
:members: | ||
:undoc-members: | ||
:show-inheritance: |
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,9 @@ | ||
__version__ = "1.1.3" | ||
|
||
from pymc3_models.models.HierarchicalLogisticRegression import HierarchicalLogisticRegression | ||
from pymc3_models.models.LinearRegression import LinearRegression | ||
from pymc3_models.models.GaussianProcessRegression import GaussianProcessRegression | ||
from pymc3_models.models.SparseGaussianProcessRegression import SparseGaussianProcessRegression | ||
from pymc3_models.models.StudentsTProcessRegression import StudentsTProcessRegression | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
import numpy as np | ||
import pymc3 as pm | ||
from sklearn.metrics import r2_score | ||
import theano | ||
import theano.tensor as T | ||
|
||
from pymc3_models.exc import PyMC3ModelsError | ||
from pymc3_models.models import BayesianModel | ||
|
||
|
||
class GaussianProcessRegression(BayesianModel): | ||
""" | ||
Gaussian Process Regression built using PyMC3. | ||
""" | ||
|
||
def __init__(self, prior_mean=0.0): | ||
self.ppc = None | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could these properties be alphabetized? |
||
self.gp = None | ||
self.num_training_samples = None | ||
self.num_pred = None | ||
self.prior_mean = prior_mean | ||
|
||
super(GaussianProcessRegression, self).__init__() | ||
|
||
def create_model(self): | ||
""" | ||
Creates and returns the PyMC3 model. | ||
|
||
Note: The size of the shared variables must match the size of the | ||
training data. Otherwise, setting the shared variables later will | ||
raise an error. See http://docs.pymc.io/advanced_theano.html | ||
|
||
Returns | ||
---------- | ||
the PyMC3 model | ||
""" | ||
model_input = theano.shared(np.zeros([self.num_training_samples, self.num_pred])) | ||
|
||
model_output = theano.shared(np.zeros(self.num_training_samples)) | ||
|
||
self.shared_vars = { | ||
'model_input': model_input, | ||
'model_output': model_output, | ||
} | ||
|
||
model = pm.Model() | ||
|
||
with model: | ||
length_scale = pm.Gamma('length_scale', alpha=2, beta=1, shape=(1, self.num_pred)) | ||
signal_variance = pm.HalfCauchy('signal_variance', beta=5, shape=(1)) | ||
noise_variance = pm.HalfCauchy('noise_variance', beta=5, shape=(1)) | ||
|
||
# cov = signal_variance**2 * pm.gp.cov.ExpQuad(1, length_scale) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this line needed? |
||
cov = signal_variance ** 2 * pm.gp.cov.Matern52(1, length_scale) | ||
|
||
# mean_function = pm.gp.mean.Zero() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this comment outdated since you allow the user to specify a prior_mean now? |
||
mean_function = pm.gp.mean.Constant(self.prior_mean) | ||
|
||
self.gp = pm.gp.Latent(mean_func=mean_function, cov_func=cov) | ||
|
||
f = self.gp.prior('f', X=model_input.get_value()) | ||
|
||
y = pm.Normal('y', mu=f, sd=noise_variance, observed=model_output) | ||
|
||
return model | ||
|
||
def fit(self, X, y, inference_type='advi', minibatch_size=None, inference_args=None): | ||
""" | ||
Train the Gaussian Process Regression model | ||
|
||
Parameters | ||
---------- | ||
X : numpy array, shape [n_samples, n_features] | ||
|
||
y : numpy array, shape [n_samples, ] | ||
|
||
inference_type : string, specifies which inference method to call. | ||
Defaults to 'advi'. Currently, only 'advi' and 'nuts' are supported | ||
|
||
minibatch_size : number of samples to include in each minibatch | ||
for ADVI, defaults to None, so minibatch is not run by default | ||
|
||
inference_args : dict, arguments to be passed to the inference methods. | ||
Check the PyMC3 docs for permissable values. If no arguments are | ||
specified, default values will be set. | ||
""" | ||
self.num_training_samples, self.num_pred = X.shape | ||
|
||
self.inference_type = inference_type | ||
|
||
if y.ndim != 1: | ||
y = np.squeeze(y) | ||
|
||
if not inference_args: | ||
inference_args = self._set_default_inference_args() | ||
|
||
if self.cached_model is None: | ||
self.cached_model = self.create_model() | ||
|
||
if minibatch_size: | ||
with self.cached_model: | ||
minibatches = { | ||
self.shared_vars['model_input']: pm.Minibatch(X, batch_size=minibatch_size), | ||
self.shared_vars['model_output']: pm.Minibatch(y, batch_size=minibatch_size), | ||
} | ||
|
||
inference_args['more_replacements'] = minibatches | ||
else: | ||
self._set_shared_vars({'model_input': X, 'model_output': y}) | ||
|
||
self._inference(inference_type, inference_args) | ||
|
||
return self | ||
|
||
def predict(self, X, return_std=False): | ||
""" | ||
Predicts values of new data with a trained Gaussian Process Regression model | ||
|
||
Parameters | ||
---------- | ||
X : numpy array, shape [n_samples, n_features] | ||
|
||
return_std : Boolean flag of whether to return standard deviations with mean values. Defaults to False. | ||
""" | ||
|
||
if self.trace is None: | ||
raise PyMC3ModelsError('Run fit on the model before predict.') | ||
|
||
num_samples = X.shape[0] | ||
|
||
if self.cached_model is None: | ||
self.cached_model = self.create_model() | ||
|
||
self._set_shared_vars({'model_input': X, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I prefer to format longer lines like this:
Could you please change your code to match that style of indenting? |
||
'model_output': np.zeros(num_samples)}) | ||
|
||
with self.cached_model: | ||
f_pred = self.gp.conditional("f_pred", X) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please switch to single quotes to be consistent with the rest of the code. |
||
self.ppc = pm.sample_ppc(self.trace, | ||
vars=[f_pred], | ||
samples=2000) | ||
|
||
if return_std: | ||
return self.ppc['f_pred'].mean(axis=0), self.ppc['f_pred'].std(axis=0) | ||
else: | ||
return self.ppc['f_pred'].mean(axis=0) | ||
|
||
def score(self, X, y): | ||
""" | ||
Scores new data with a trained model. | ||
|
||
Parameters | ||
---------- | ||
X : numpy array, shape [n_samples, n_features] | ||
|
||
y : numpy array, shape [n_samples, ] | ||
""" | ||
|
||
return r2_score(y, self.predict(X)) | ||
|
||
def save(self, file_prefix): | ||
params = { | ||
'inference_type': self.inference_type, | ||
'num_pred': self.num_pred, | ||
'num_training_samples': self.num_training_samples | ||
} | ||
|
||
super(GaussianProcessRegression, self).save(file_prefix, params) | ||
|
||
def load(self, file_prefix): | ||
params = super(GaussianProcessRegression, self).load(file_prefix, | ||
load_custom_params=True) | ||
|
||
self.inference_type = params['inference_type'] | ||
self.num_pred = params['num_pred'] | ||
self.num_training_samples = params['num_training_samples'] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import numpy as np | ||
import pymc3 as pm | ||
from sklearn.metrics import r2_score | ||
import theano | ||
import theano.tensor as T | ||
|
||
from pymc3_models.exc import PyMC3ModelsError | ||
from pymc3_models.models.GaussianProcessRegression import GaussianProcessRegression | ||
|
||
|
||
class SparseGaussianProcessRegression(GaussianProcessRegression): | ||
""" | ||
Sparse Gaussian Process Regression model built using PyMC3. | ||
""" | ||
|
||
def __init__(self, prior_mean=0.0): | ||
super(SparseGaussianProcessRegression, self).__init__(prior_mean=prior_mean) | ||
|
||
def create_model(self): | ||
""" | ||
Creates and returns the PyMC3 model. | ||
|
||
Note: The size of the shared variables must match the size of the | ||
training data. Otherwise, setting the shared variables later will | ||
raise an error. See http://docs.pymc.io/advanced_theano.html | ||
|
||
Returns | ||
---------- | ||
the PyMC3 model | ||
""" | ||
model_input = theano.shared(np.zeros([self.num_training_samples, self.num_pred])) | ||
|
||
model_output = theano.shared(np.zeros(self.num_training_samples)) | ||
|
||
self.shared_vars = { | ||
'model_input': model_input, | ||
'model_output': model_output, | ||
} | ||
|
||
self.gp = None | ||
model = pm.Model() | ||
|
||
with model: | ||
length_scale = pm.Gamma('length_scale', alpha=2, beta=1, shape=(1, self.num_pred)) | ||
signal_variance = pm.HalfCauchy('signal_variance', beta=5, shape=(1)) | ||
noise_variance = pm.HalfCauchy('noise_variance', beta=5, shape=(1)) | ||
|
||
# cov_function = signal_variance**2 * pm.gp.cov.ExpQuad(1, length_scale) | ||
cov_function = signal_variance ** 2 * pm.gp.cov.Matern52(1, length_scale) | ||
|
||
# mean_function = pm.gp.mean.Zero() | ||
mean_function = pm.gp.mean.Constant(self.prior_mean) | ||
|
||
self.gp = pm.gp.MarginalSparse(mean_func=mean_function, | ||
cov_func=cov_function, | ||
approx="FITC") | ||
|
||
# initialize 20 inducing points with K-means | ||
# gp.util | ||
Xu = pm.gp.util.kmeans_inducing_points(20, X=model_input.get_value()) | ||
|
||
y = self.gp.marginal_likelihood('y', | ||
X=model_input.get_value(), | ||
Xu=Xu, | ||
y=model_output.get_value(), | ||
noise=noise_variance) | ||
|
||
return model | ||
|
||
def save(self, file_prefix): | ||
params = { | ||
'inference_type': self.inference_type, | ||
'num_pred': self.num_pred, | ||
'num_training_samples': self.num_training_samples | ||
} | ||
|
||
super(SparseGaussianProcessRegression, self).save(file_prefix, params) | ||
|
||
def load(self, file_prefix): | ||
params = super(SparseGaussianProcessRegression, self).load(file_prefix, | ||
load_custom_params=True) | ||
|
||
self.inference_type = params['inference_type'] | ||
self.num_pred = params['num_pred'] | ||
self.num_training_samples = params['num_training_samples'] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I added in a
_version.py
file in master so you can remove this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you might have to modify your notebooks too.