Skip to content

Parallel calculation support in groupwise cross validation #501

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions include/albatross/Distribution
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@
#include "Common"

#include <albatross/src/core/distribution.hpp>

#include "utils/AsyncUtils"

#include <albatross/src/eigen/serializable_ldlt.hpp>
#include <albatross/src/core/transformed_distribution.hpp>

Expand Down
3 changes: 3 additions & 0 deletions include/albatross/GP
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

#include <albatross/src/evaluation/likelihood.hpp>
#include <albatross/src/evaluation/cross_validation_utils.hpp>

#include "utils/AsyncUtils"

#include <albatross/src/eigen/serializable_ldlt.hpp>
#include <albatross/src/covariance_functions/representations.hpp>
#include <albatross/src/models/gp.hpp>
Expand Down
20 changes: 10 additions & 10 deletions include/albatross/src/eigen/serializable_ldlt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ class SerializableLDLT : public LDLT<MatrixXd, Lower> {
}

std::vector<Eigen::MatrixXd>
inverse_blocks(const std::vector<std::vector<std::size_t>> &blocks) const {
inverse_blocks(const std::vector<std::vector<std::size_t>> &blocks,
ThreadPool *pool = nullptr) const {
/*
* The LDLT decomposition is stored such that,
*
Expand Down Expand Up @@ -158,15 +159,14 @@ class SerializableLDLT : public LDLT<MatrixXd, Lower> {

ALBATROSS_ASSERT(!inverse_cholesky.hasNaN());

std::vector<Eigen::MatrixXd> output;
for (const auto &block_indices : blocks) {
Eigen::MatrixXd sub_matrix =
albatross::subset_cols(inverse_cholesky, block_indices);
Eigen::MatrixXd one_block =
sub_matrix.transpose().lazyProduct(sub_matrix);
output.push_back(one_block);
}
return output;
return albatross::apply(
blocks,
[&](const auto &block_indices) -> Eigen::MatrixXd {
Eigen::MatrixXd sub_matrix =
albatross::subset_cols(inverse_cholesky, block_indices);
return sub_matrix.transpose().lazyProduct(sub_matrix);
},
pool);
}

/*
Expand Down
75 changes: 44 additions & 31 deletions include/albatross/src/evaluation/cross_validation_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,40 +195,47 @@ held_out_prediction(const Eigen::MatrixXd &inverse_block,
}

template <typename GroupKey, typename PredictType>
inline std::map<GroupKey, PredictType>
held_out_predictions(const Eigen::SerializableLDLT &covariance,
const Eigen::VectorXd &target_mean,
const Eigen::VectorXd &information,
const GroupIndexer<GroupKey> &group_indexer,
PredictTypeIdentity<PredictType> predict_type) {

const std::vector<GroupIndices> indices = map_values(group_indexer);
const std::vector<GroupKey> group_keys = map_keys(group_indexer);
const auto inverse_blocks = covariance.inverse_blocks(indices);

std::map<GroupKey, PredictType> output;
for (std::size_t i = 0; i < inverse_blocks.size(); i++) {
const Eigen::VectorXd yi = subset(target_mean, indices[i]);
const Eigen::VectorXd vi = subset(information, indices[i]);
output[group_keys[i]] =
held_out_prediction(inverse_blocks[i], yi, vi, predict_type);
}
return output;
inline std::map<GroupKey, PredictType> held_out_predictions(
const Eigen::SerializableLDLT &covariance,
const Eigen::VectorXd &target_mean, const Eigen::VectorXd &information,
const GroupIndexer<GroupKey> &group_indexer,
PredictTypeIdentity<PredictType> predict_type, ThreadPool *pool) {
// This happens outside the threaded loop so that the internal solve
// happens only once (and potentially uses Eigen-internal
// threading).
const auto inverse_blocks =
covariance.inverse_blocks(map_values(group_indexer), pool);

std::map<GroupKey, Eigen::MatrixXd> blocks;
std::transform(group_indexer.begin(), group_indexer.end(),
inverse_blocks.begin(), std::inserter(blocks, blocks.end()),
[](const auto &idx_kv, const auto &value) {
return std::make_pair(idx_kv.first, value);
});

return apply(
group_indexer,
[&](const auto &key, const auto &indices) {
return held_out_prediction(
blocks[key], subset(target_mean, indices),
subset(information, indices), predict_type);
},
pool)
.get_map();
}

template <typename GroupKey, typename PredictType>
inline std::map<GroupKey, PredictType>
leave_one_group_out_conditional(const JointDistribution &prior,
const MarginalDistribution &truth,
const GroupIndexer<GroupKey> &group_indexer,
PredictTypeIdentity<PredictType> predict_type) {
inline std::map<GroupKey, PredictType> leave_one_group_out_conditional(
const JointDistribution &prior, const MarginalDistribution &truth,
const GroupIndexer<GroupKey> &group_indexer,
PredictTypeIdentity<PredictType> predict_type, ThreadPool *pool) {
Eigen::MatrixXd covariance = prior.covariance;
covariance += truth.covariance;
Eigen::SerializableLDLT ldlt(covariance);
const Eigen::VectorXd deviation = truth.mean - prior.mean;
const Eigen::VectorXd information = ldlt.solve(deviation);
return held_out_predictions(covariance, truth.mean, information,
group_indexer, predict_type);
group_indexer, predict_type, pool);
}

} // namespace details
Expand All @@ -237,27 +244,33 @@ template <typename GroupKey>
inline std::map<GroupKey, Eigen::VectorXd>
leave_one_group_out_conditional_means(
const JointDistribution &prior, const MarginalDistribution &truth,
const GroupIndexer<GroupKey> &group_indexer) {
const GroupIndexer<GroupKey> &group_indexer,
ThreadPool *pool = serial_thread_pool) {
return details::leave_one_group_out_conditional(
prior, truth, group_indexer, PredictTypeIdentity<Eigen::VectorXd>());
prior, truth, group_indexer, PredictTypeIdentity<Eigen::VectorXd>(),
pool);
}

template <typename GroupKey>
inline std::map<GroupKey, MarginalDistribution>
leave_one_group_out_conditional_marginals(
const JointDistribution &prior, const MarginalDistribution &truth,
const GroupIndexer<GroupKey> &group_indexer) {
const GroupIndexer<GroupKey> &group_indexer,
ThreadPool *pool = serial_thread_pool) {
return details::leave_one_group_out_conditional(
prior, truth, group_indexer, PredictTypeIdentity<MarginalDistribution>());
prior, truth, group_indexer, PredictTypeIdentity<MarginalDistribution>(),
pool);
}

template <typename GroupKey>
inline std::map<GroupKey, JointDistribution>
leave_one_group_out_conditional_joints(
const JointDistribution &prior, const MarginalDistribution &truth,
const GroupIndexer<GroupKey> &group_indexer) {
const GroupIndexer<GroupKey> &group_indexer,
ThreadPool *pool = serial_thread_pool) {
return details::leave_one_group_out_conditional(
prior, truth, group_indexer, PredictTypeIdentity<JointDistribution>());
prior, truth, group_indexer, PredictTypeIdentity<JointDistribution>(),
pool);
}

} // namespace albatross
Expand Down
6 changes: 3 additions & 3 deletions include/albatross/src/models/gp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -475,9 +475,9 @@ gp_cross_validated_predictions(const RegressionDataset<FeatureType> &dataset,
// have been formed by taking the mean function into account and the
// held out predictions will use that to derive deltas from the truth
// so removing the mean, then adding it back later is unneccesary
return details::held_out_predictions(gp_fit.train_covariance,
dataset.targets.mean, gp_fit.information,
group_indexer, predict_type);
return details::held_out_predictions(
gp_fit.train_covariance, dataset.targets.mean, gp_fit.information,
group_indexer, predict_type, model.threads_.get());
}

/*
Expand Down
2 changes: 2 additions & 0 deletions include/albatross/utils/AsyncUtils
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#endif

#include "../Common"
#include "../src/indexing/traits.hpp"
#include "../src/indexing/apply.hpp"
#include "../src/utils/async_utils.hpp"

#endif
Loading