diff --git a/include/albatross/Distribution b/include/albatross/Distribution index b5407986..595e184c 100644 --- a/include/albatross/Distribution +++ b/include/albatross/Distribution @@ -16,6 +16,9 @@ #include "Common" #include + +#include "utils/AsyncUtils" + #include #include diff --git a/include/albatross/GP b/include/albatross/GP index c93cd1be..41d89a5e 100644 --- a/include/albatross/GP +++ b/include/albatross/GP @@ -20,6 +20,9 @@ #include #include + +#include "utils/AsyncUtils" + #include #include #include diff --git a/include/albatross/src/eigen/serializable_ldlt.hpp b/include/albatross/src/eigen/serializable_ldlt.hpp index fb7396b9..5595585d 100644 --- a/include/albatross/src/eigen/serializable_ldlt.hpp +++ b/include/albatross/src/eigen/serializable_ldlt.hpp @@ -130,7 +130,8 @@ class SerializableLDLT : public LDLT { } std::vector - inverse_blocks(const std::vector> &blocks) const { + inverse_blocks(const std::vector> &blocks, + ThreadPool *pool = nullptr) const { /* * The LDLT decomposition is stored such that, * @@ -158,15 +159,14 @@ class SerializableLDLT : public LDLT { ALBATROSS_ASSERT(!inverse_cholesky.hasNaN()); - std::vector output; - for (const auto &block_indices : blocks) { - Eigen::MatrixXd sub_matrix = - albatross::subset_cols(inverse_cholesky, block_indices); - Eigen::MatrixXd one_block = - sub_matrix.transpose().lazyProduct(sub_matrix); - output.push_back(one_block); - } - return output; + return albatross::apply( + blocks, + [&](const auto &block_indices) -> Eigen::MatrixXd { + Eigen::MatrixXd sub_matrix = + albatross::subset_cols(inverse_cholesky, block_indices); + return sub_matrix.transpose().lazyProduct(sub_matrix); + }, + pool); } /* diff --git a/include/albatross/src/evaluation/cross_validation_utils.hpp b/include/albatross/src/evaluation/cross_validation_utils.hpp index 71e9afe1..6a4860f0 100644 --- a/include/albatross/src/evaluation/cross_validation_utils.hpp +++ b/include/albatross/src/evaluation/cross_validation_utils.hpp @@ -195,40 +195,47 @@ held_out_prediction(const Eigen::MatrixXd &inverse_block, } template -inline std::map -held_out_predictions(const Eigen::SerializableLDLT &covariance, - const Eigen::VectorXd &target_mean, - const Eigen::VectorXd &information, - const GroupIndexer &group_indexer, - PredictTypeIdentity predict_type) { - - const std::vector indices = map_values(group_indexer); - const std::vector group_keys = map_keys(group_indexer); - const auto inverse_blocks = covariance.inverse_blocks(indices); - - std::map output; - for (std::size_t i = 0; i < inverse_blocks.size(); i++) { - const Eigen::VectorXd yi = subset(target_mean, indices[i]); - const Eigen::VectorXd vi = subset(information, indices[i]); - output[group_keys[i]] = - held_out_prediction(inverse_blocks[i], yi, vi, predict_type); - } - return output; +inline std::map held_out_predictions( + const Eigen::SerializableLDLT &covariance, + const Eigen::VectorXd &target_mean, const Eigen::VectorXd &information, + const GroupIndexer &group_indexer, + PredictTypeIdentity predict_type, ThreadPool *pool) { + // This happens outside the threaded loop so that the internal solve + // happens only once (and potentially uses Eigen-internal + // threading). + const auto inverse_blocks = + covariance.inverse_blocks(map_values(group_indexer), pool); + + std::map blocks; + std::transform(group_indexer.begin(), group_indexer.end(), + inverse_blocks.begin(), std::inserter(blocks, blocks.end()), + [](const auto &idx_kv, const auto &value) { + return std::make_pair(idx_kv.first, value); + }); + + return apply( + group_indexer, + [&](const auto &key, const auto &indices) { + return held_out_prediction( + blocks[key], subset(target_mean, indices), + subset(information, indices), predict_type); + }, + pool) + .get_map(); } template -inline std::map -leave_one_group_out_conditional(const JointDistribution &prior, - const MarginalDistribution &truth, - const GroupIndexer &group_indexer, - PredictTypeIdentity predict_type) { +inline std::map leave_one_group_out_conditional( + const JointDistribution &prior, const MarginalDistribution &truth, + const GroupIndexer &group_indexer, + PredictTypeIdentity predict_type, ThreadPool *pool) { Eigen::MatrixXd covariance = prior.covariance; covariance += truth.covariance; Eigen::SerializableLDLT ldlt(covariance); const Eigen::VectorXd deviation = truth.mean - prior.mean; const Eigen::VectorXd information = ldlt.solve(deviation); return held_out_predictions(covariance, truth.mean, information, - group_indexer, predict_type); + group_indexer, predict_type, pool); } } // namespace details @@ -237,27 +244,33 @@ template inline std::map leave_one_group_out_conditional_means( const JointDistribution &prior, const MarginalDistribution &truth, - const GroupIndexer &group_indexer) { + const GroupIndexer &group_indexer, + ThreadPool *pool = serial_thread_pool) { return details::leave_one_group_out_conditional( - prior, truth, group_indexer, PredictTypeIdentity()); + prior, truth, group_indexer, PredictTypeIdentity(), + pool); } template inline std::map leave_one_group_out_conditional_marginals( const JointDistribution &prior, const MarginalDistribution &truth, - const GroupIndexer &group_indexer) { + const GroupIndexer &group_indexer, + ThreadPool *pool = serial_thread_pool) { return details::leave_one_group_out_conditional( - prior, truth, group_indexer, PredictTypeIdentity()); + prior, truth, group_indexer, PredictTypeIdentity(), + pool); } template inline std::map leave_one_group_out_conditional_joints( const JointDistribution &prior, const MarginalDistribution &truth, - const GroupIndexer &group_indexer) { + const GroupIndexer &group_indexer, + ThreadPool *pool = serial_thread_pool) { return details::leave_one_group_out_conditional( - prior, truth, group_indexer, PredictTypeIdentity()); + prior, truth, group_indexer, PredictTypeIdentity(), + pool); } } // namespace albatross diff --git a/include/albatross/src/models/gp.hpp b/include/albatross/src/models/gp.hpp index 204da6dc..7e3a10a1 100644 --- a/include/albatross/src/models/gp.hpp +++ b/include/albatross/src/models/gp.hpp @@ -475,9 +475,9 @@ gp_cross_validated_predictions(const RegressionDataset &dataset, // have been formed by taking the mean function into account and the // held out predictions will use that to derive deltas from the truth // so removing the mean, then adding it back later is unneccesary - return details::held_out_predictions(gp_fit.train_covariance, - dataset.targets.mean, gp_fit.information, - group_indexer, predict_type); + return details::held_out_predictions( + gp_fit.train_covariance, dataset.targets.mean, gp_fit.information, + group_indexer, predict_type, model.threads_.get()); } /* diff --git a/include/albatross/utils/AsyncUtils b/include/albatross/utils/AsyncUtils index aaedede2..ca4a497c 100644 --- a/include/albatross/utils/AsyncUtils +++ b/include/albatross/utils/AsyncUtils @@ -23,6 +23,8 @@ #endif #include "../Common" +#include "../src/indexing/traits.hpp" +#include "../src/indexing/apply.hpp" #include "../src/utils/async_utils.hpp" #endif