|
| 1 | +""" |
| 2 | +Checkout the below url to understand, how Bayesian regression differs from Linear Regression |
| 3 | +https://towardsdatascience.com/introduction-to-bayesian-linear-regression-e66e60791ea7 |
| 4 | +https://dzone.com/articles/bayesian-learning-for-machine-learning-part-ii-lin |
| 5 | +""" |
| 6 | +import pandas as pd |
| 7 | +import torch |
| 8 | +from scipy.stats import chi2, multivariate_normal |
| 9 | +from sklearn.model_selection import train_test_split |
| 10 | +from itertools import combinations_with_replacement |
| 11 | +import matplotlib.pyplot as plt |
| 12 | + |
| 13 | +def mean_squared_error(y_true, y_pred): |
| 14 | + """ Returns the mean squared error between y_true and y_pred """ |
| 15 | + mse = torch.mean(torch.pow(y_true - y_pred, 2)) |
| 16 | + return mse |
| 17 | + |
| 18 | +def polynomial_features(X, degree): |
| 19 | + """ |
| 20 | + It creates polynomial features from existing set of features. For instance, |
| 21 | + X_1, X_2, X_3 are available features, then polynomial features takes combinations of |
| 22 | + these features to create new feature by doing X_1*X_2, X_1*X_3, X_2*X3. |
| 23 | +
|
| 24 | + For Degree 2: |
| 25 | + combinations output: [(), (0,), (1,), (2,), (3,), (0, 0), (0, 1), (0, 2), (0, 3), |
| 26 | + (1, 1), (1, 2), (1, 3), (2, 2), (2, 3), (3, 3)] |
| 27 | + :param X: Input tensor (For Iris Dataset, (150, 4)) |
| 28 | + :param degree: Polynomial degree of 2, i.e we'll have product of two feature vector at max. |
| 29 | + :return: Output tensor (After adding polynomial features, the number of features increases to 15) |
| 30 | + """ |
| 31 | + n_samples, n_features = X.shape[0], X.shape[1] |
| 32 | + def index_combination(): |
| 33 | + combinations = [combinations_with_replacement(range(n_features), i) for i in range(0, degree+1)] |
| 34 | + flat_combinations = [item for sublists in combinations for item in sublists] |
| 35 | + return flat_combinations |
| 36 | + |
| 37 | + combinations = index_combination() |
| 38 | + n_output_features = len(combinations) |
| 39 | + X_new = torch.empty((n_samples, n_output_features)) |
| 40 | + |
| 41 | + for i, index_combs in enumerate(combinations): |
| 42 | + X_new[:, i] = torch.prod(X[:, index_combs], dim=1) |
| 43 | + |
| 44 | + X_new = X_new.type(torch.DoubleTensor) |
| 45 | + return X_new |
| 46 | + |
| 47 | + |
| 48 | +class BayesianRegression: |
| 49 | + def __init__(self, n_draws, mu_0, omega_0, nu_0, sigma_sq_0, polynomial_degree=0, credible_interval=95): |
| 50 | + """ |
| 51 | + Bayesian regression model. If poly_degree is specified the features will |
| 52 | + be transformed to with a polynomial basis function, which allows for polynomial |
| 53 | + regression. Assumes Normal prior and likelihood for the weights and scaled inverse |
| 54 | + chi-squared prior and likelihood for the variance of the weights. |
| 55 | +
|
| 56 | + :param n_draws: The number of simulated draws from the posterior of the parameters. |
| 57 | + :param mu_0: The mean values of the prior Normal distribution of the parameters. |
| 58 | + :param omega_0: The precision matrix of the prior Normal distribution of the parameters. |
| 59 | + :param nu_0: The degrees of freedom of the prior scaled inverse chi squared distribution. |
| 60 | + :param sigma_sq_0: The scale parameter of the prior scaled inverse chi squared distribution. |
| 61 | + :param polynomial_degree: The polynomial degree that the features should be transformed to. Allows |
| 62 | + for polynomial regression. |
| 63 | + :param credible_interval: The credible interval (ETI in this impl.). 95 => 95% credible interval of the posterior |
| 64 | + of the parameters. |
| 65 | + """ |
| 66 | + self.n_draws = n_draws |
| 67 | + self.polynomial_degree = polynomial_degree |
| 68 | + self.credible_interval = credible_interval |
| 69 | + |
| 70 | + # Prior parameters |
| 71 | + self.mu_0 = mu_0 |
| 72 | + self.omega_0 = omega_0 |
| 73 | + self.nu_0 = nu_0 |
| 74 | + self.sigma_sq_0 = sigma_sq_0 |
| 75 | + |
| 76 | + def scaled_inverse_chi_square(self, n, df, scale): |
| 77 | + """ |
| 78 | + Allows for simulation from the scaled inverse chi squared |
| 79 | + distribution. Assumes the variance is distributed according to |
| 80 | + this distribution. |
| 81 | + :param n: |
| 82 | + :param df: |
| 83 | + :param scale: |
| 84 | + :return: |
| 85 | + """ |
| 86 | + X = chi2.rvs(size=n, df=df) |
| 87 | + sigma_sq = df * scale / X |
| 88 | + return sigma_sq |
| 89 | + |
| 90 | + def fit(self, X, y): |
| 91 | + # For polynomial transformation |
| 92 | + if self.polynomial_degree: |
| 93 | + X = polynomial_features(X, degree=self.polynomial_degree) |
| 94 | + |
| 95 | + n_samples, n_features = X.shape[0], X.shape[1] |
| 96 | + X_X_T = torch.mm(X.T, X) |
| 97 | + |
| 98 | + # Least squares approximate of beta |
| 99 | + beta_hat = torch.mm(torch.mm(torch.pinverse(X_X_T), X.T), y) |
| 100 | + |
| 101 | + # The posterior parameters can be determined analytically since we assume |
| 102 | + # conjugate priors for the likelihoods. |
| 103 | + # Normal prior / likelihood => Normal posterior |
| 104 | + mu_n = torch.mm(torch.pinverse(X_X_T + self.omega_0), torch.mm(X_X_T, beta_hat) + torch.mm(self.omega_0, self.mu_0.unsqueeze(1))) |
| 105 | + omega_n = X_X_T + self.omega_0 |
| 106 | + nu_n = self.nu_0 + n_samples |
| 107 | + |
| 108 | + # Scaled inverse chi-squared prior / likelihood => Scaled inverse chi-squared posterior |
| 109 | + sigma_sq_n = (1.0/nu_n) * (self.nu_0 * self.sigma_sq_0 + torch.mm(y.T, y) + torch.mm(torch.mm(self.mu_0.unsqueeze(1).T, self.omega_0), self.mu_0.unsqueeze(1)) - torch.mm(mu_n.T, torch.mm(omega_n, mu_n))) |
| 110 | + |
| 111 | + # Simulate parameter values for n_draws |
| 112 | + beta_draws = torch.empty((self.n_draws, n_features)) |
| 113 | + for i in range(self.n_draws): |
| 114 | + sigma_sq = self.scaled_inverse_chi_square(n=1, df=nu_n, scale=sigma_sq_n) |
| 115 | + beta = multivariate_normal.rvs(size=1, mean=mu_n[:,0], cov=sigma_sq * torch.pinverse(omega_n)) |
| 116 | + beta_draws[1, :] = torch.tensor(beta,dtype=torch.float) |
| 117 | + |
| 118 | + # Select the mean of the simulated variables as the ones used to make predictions |
| 119 | + self.w = torch.mean(beta_draws, dim=0, dtype=torch.double) |
| 120 | + |
| 121 | + # Lower and upper boundary of the credible interval |
| 122 | + l_eti = 0.50 - self.credible_interval / 2 |
| 123 | + u_eti = 0.50 + self.credible_interval / 2 |
| 124 | + self.eti = torch.tensor([[torch.quantile(beta_draws[:, i], q=l_eti), torch.quantile(beta_draws[:, i], q=u_eti)] for i in range(n_features)], dtype=torch.double) |
| 125 | + |
| 126 | + def predict(self, X, eti=False): |
| 127 | + if self.polynomial_degree: |
| 128 | + X = polynomial_features(X, degree=self.polynomial_degree) |
| 129 | + y_pred = torch.mm(X, self.w.unsqueeze(1)) |
| 130 | + # If the lower and upper boundaries for the 95% |
| 131 | + # equal tail interval should be returned |
| 132 | + if eti: |
| 133 | + lower_w = self.eti[:, 0] |
| 134 | + upper_w = self.eti[:, 1] |
| 135 | + |
| 136 | + y_lower_prediction = torch.mm(X, lower_w.unsqueeze(1)) |
| 137 | + y_upper_prediction = torch.mm(X, upper_w.unsqueeze(1)) |
| 138 | + |
| 139 | + return y_pred, y_lower_prediction, y_upper_prediction |
| 140 | + |
| 141 | + return y_pred |
| 142 | + |
| 143 | +if __name__ == '__main__': |
| 144 | + data = pd.read_csv('temp.txt', sep="\t") |
| 145 | + X = torch.tensor(data["time"].values).unsqueeze(0).T |
| 146 | + y = torch.tensor(data["temp"].values).unsqueeze(0).T |
| 147 | + x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4) |
| 148 | + n_samples, n_features = X.shape[0], X.shape[1] |
| 149 | + mu_0 = torch.zeros(n_features, dtype=torch.double) |
| 150 | + omega_0 = torch.diag(torch.tensor([0.0001] * n_features, dtype=torch.double)) |
| 151 | + nu_0 = 1 |
| 152 | + sigma_sq_0 = 100 |
| 153 | + credible_interval = 0.40 |
| 154 | + classifier = BayesianRegression(n_draws=2000, |
| 155 | + polynomial_degree=4, |
| 156 | + mu_0=mu_0, |
| 157 | + omega_0=omega_0, |
| 158 | + nu_0=nu_0, |
| 159 | + sigma_sq_0=sigma_sq_0, |
| 160 | + credible_interval=credible_interval) |
| 161 | + classifier.fit(x_train, y_train) |
| 162 | + y_pred = classifier.predict(x_test) |
| 163 | + mse = mean_squared_error(y_test, y_pred) |
| 164 | + y_pred_, y_lower_, y_upper_ = classifier.predict(X=X, eti=True) |
| 165 | + print("Mean Squared Error:", mse) |
| 166 | + # |
| 167 | + # Color map |
| 168 | + cmap = plt.get_cmap('viridis') |
| 169 | + |
| 170 | + # Plot the results |
| 171 | + m1 = plt.scatter(366 * x_train, y_train, color=cmap(0.9), s=10) |
| 172 | + m2 = plt.scatter(366 * x_test, y_test, color=cmap(0.5), s=10) |
| 173 | + p1 = plt.plot(366 * X, y_pred_, color="black", linewidth=2, label="Prediction") |
| 174 | + p2 = plt.plot(366 * X, y_lower_, color="gray", linewidth=2, label="{0}% Credible Interval".format(credible_interval)) |
| 175 | + p3 = plt.plot(366 * X, y_upper_, color="gray", linewidth=2) |
| 176 | + plt.axis((0, 366, -20, 25)) |
| 177 | + plt.suptitle("Bayesian Regression") |
| 178 | + plt.title("MSE: %.2f" % mse, fontsize=10) |
| 179 | + plt.xlabel('Day') |
| 180 | + plt.ylabel('Temperature in Celcius') |
| 181 | + plt.legend(loc='lower right') |
| 182 | + # plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') |
| 183 | + plt.legend(loc='lower right') |
| 184 | + |
| 185 | + plt.show() |
0 commit comments