Skip to content

[ML] Refactor tests that use CDataGatherer constructor #2845

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 23 commits into from
Apr 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
756 changes: 281 additions & 475 deletions lib/model/unittest/CDetectionRuleTest.cc

Large diffs are not rendered by default.

1,336 changes: 517 additions & 819 deletions lib/model/unittest/CEventRateDataGathererTest.cc

Large diffs are not rendered by default.

391 changes: 191 additions & 200 deletions lib/model/unittest/CEventRatePopulationDataGathererTest.cc

Large diffs are not rendered by default.

453 changes: 216 additions & 237 deletions lib/model/unittest/CHierarchicalResultsTest.cc

Large diffs are not rendered by default.

614 changes: 295 additions & 319 deletions lib/model/unittest/CMetricDataGathererTest.cc

Large diffs are not rendered by default.

543 changes: 254 additions & 289 deletions lib/model/unittest/CMetricPopulationDataGathererTest.cc

Large diffs are not rendered by default.

114 changes: 40 additions & 74 deletions lib/model/unittest/CModelDetailsViewTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@
#include <model/CSearchKey.h>

#include "Mocks.h"
#include "ModelTestHelpers.h"

#include <boost/test/unit_test.hpp>

#include <memory>
#include <ranges>
#include <vector>

BOOST_TEST_DONT_PRINT_LOG_VALUE(ml::model::CModelPlotData::TFeatureStrByFieldDataUMapUMapCItr);
Expand All @@ -35,115 +37,79 @@ BOOST_AUTO_TEST_SUITE(CModelDetailsViewTest)

using namespace ml;

namespace {

const std::string EMPTY_STRING;

} // unnamed

class CTestFixture {
protected:
model::CResourceMonitor m_ResourceMonitor;
};

BOOST_FIXTURE_TEST_CASE(testModelPlot, CTestFixture) {
using TDoubleVec = std::vector<double>;
using TStrVec = std::vector<std::string>;
using TMockModelPtr = std::unique_ptr<model::CMockModel>;

core_t::TTime bucketLength{600};
model::CSearchKey key;
model::SModelParams params{bucketLength};
constexpr core_t::TTime bucketLength{600};
const model::CSearchKey key;
const model::SModelParams params{bucketLength};
model_t::TFeatureVec features;

model::CAnomalyDetectorModel::TDataGathererPtr gatherer;
TMockModelPtr model;

auto setupTest = [&]() {
gatherer = std::make_shared<model::CDataGatherer>(
model_t::analysisCategory(features[0]), model_t::E_None, params,
EMPTY_STRING, EMPTY_STRING, "p", EMPTY_STRING, EMPTY_STRING,
TStrVec{}, key, features, 0, 0);
std::string person11{"p11"};
std::string person12{"p12"};
std::string person21{"p21"};
std::string person22{"p22"};
bool addedPerson{false};
gatherer->addPerson(person11, m_ResourceMonitor, addedPerson);
gatherer->addPerson(person12, m_ResourceMonitor, addedPerson);
gatherer->addPerson(person21, m_ResourceMonitor, addedPerson);
gatherer->addPerson(person22, m_ResourceMonitor, addedPerson);

model.reset(new model::CMockModel{params, gatherer, {/*we don't care about influence*/}});

maths::time_series::CTimeSeriesDecomposition trend;
maths::common::CNormalMeanPrecConjugate prior{
gatherer = model::CDataGathererBuilder(model_t::analysisCategory(features[0]),
features, params, key, 0)
.personFieldName("p")
.buildSharedPtr();
for (const std::vector<std::string> persons{"p11", "p12", "p21", "p22"};
const auto& person : persons) {
bool addedPerson{false};
gatherer->addPerson(person, m_ResourceMonitor, addedPerson);
}

const model::CMockModel::TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators;
model = std::make_unique<model::CMockModel>(params, gatherer, influenceCalculators);

const maths::time_series::CTimeSeriesDecomposition trend;
const maths::common::CNormalMeanPrecConjugate prior{
maths::common::CNormalMeanPrecConjugate::nonInformativePrior(maths_t::E_ContinuousData)};
maths::common::CModelParams timeSeriesModelParams{
const maths::common::CModelParams timeSeriesModelParams{
bucketLength, 1.0, 0.001, 0.2, 6 * core::constants::HOUR, 24 * core::constants::HOUR};
maths::time_series::CUnivariateTimeSeriesModel timeSeriesModel{
const maths::time_series::CUnivariateTimeSeriesModel timeSeriesModel{
timeSeriesModelParams, 0, trend, prior};
model::CMockModel::TMathsModelUPtrVec models;
models.emplace_back(timeSeriesModel.clone(0));
models.emplace_back(timeSeriesModel.clone(1));
models.emplace_back(timeSeriesModel.clone(2));
models.emplace_back(timeSeriesModel.clone(3));
for (int i = 0; i < 4; ++i) {
models.emplace_back(timeSeriesModel.clone(i));
}
model->mockTimeSeriesModels(std::move(models));
};

LOG_DEBUG(<< "Individual sum");
{
features.assign(1, model_t::E_IndividualSumByBucketAndPerson);
auto testModelPlot = [&](model_t::EFeature feature, const TDoubleVec& values) {
features.assign(1, feature);
setupTest();

TDoubleVec values{2.0, 3.0, 0.0, 0.0};
std::size_t pid{0};
for (auto value : values) {
model->mockAddBucketValue(model_t::E_IndividualSumByBucketAndPerson,
pid++, 0, 0, {value});
model->mockAddBucketValue(feature, pid++, 0, 0, {value});
}

model::CModelPlotData plotData;
model->details()->modelPlot(0, 90.0, {}, plotData);
BOOST_TEST_REQUIRE(plotData.begin() != plotData.end());
for (const auto& featureByFieldData : plotData) {
BOOST_REQUIRE_EQUAL(values.size(), featureByFieldData.second.size());
for (const auto& byFieldData : featureByFieldData.second) {
BOOST_TEST_REQUIRE(gatherer->personId(byFieldData.first, pid));
BOOST_REQUIRE_EQUAL(1, byFieldData.second.s_ValuesPerOverField.size());
for (const auto& currentBucketValue : byFieldData.second.s_ValuesPerOverField) {
BOOST_REQUIRE_EQUAL(values[pid], currentBucketValue.second);
for (const auto & [ _, plotDataValues ] : plotData) {
BOOST_REQUIRE_EQUAL(values.size(), plotDataValues.size());
for (const auto & [ fst, snd ] : plotDataValues) {
BOOST_TEST_REQUIRE(gatherer->personId(fst, pid));
BOOST_REQUIRE_EQUAL(1, snd.s_ValuesPerOverField.size());
for (const auto & [ field_name, val ] : snd.s_ValuesPerOverField) {
BOOST_REQUIRE_EQUAL(values[pid], val);
}
}
}
}

LOG_DEBUG(<< "Individual count");
{
features.assign(1, model_t::E_IndividualCountByBucketAndPerson);
setupTest();
};

TDoubleVec values{0.0, 1.0, 3.0};
std::size_t pid{0};
for (auto value : values) {
model->mockAddBucketValue(model_t::E_IndividualCountByBucketAndPerson,
pid++, 0, 0, {value});
}
LOG_DEBUG(<< "Individual sum");
testModelPlot(model_t::E_IndividualSumByBucketAndPerson, {2.0, 3.0, 0.0, 0.0});

model::CModelPlotData plotData;
model->details()->modelPlot(0, 90.0, {}, plotData);
BOOST_TEST_REQUIRE(plotData.begin() != plotData.end());
for (const auto& featureByFieldData : plotData) {
BOOST_REQUIRE_EQUAL(values.size(), featureByFieldData.second.size());
for (const auto& byFieldData : featureByFieldData.second) {
BOOST_TEST_REQUIRE(gatherer->personId(byFieldData.first, pid));
BOOST_REQUIRE_EQUAL(1, byFieldData.second.s_ValuesPerOverField.size());
for (const auto& currentBucketValue : byFieldData.second.s_ValuesPerOverField) {
BOOST_REQUIRE_EQUAL(values[pid], currentBucketValue.second);
}
}
}
}
LOG_DEBUG(<< "Individual count");
testModelPlot(model_t::E_IndividualCountByBucketAndPerson, {0.0, 1.0, 3.0});
}

BOOST_AUTO_TEST_SUITE_END()
42 changes: 15 additions & 27 deletions lib/model/unittest/CRuleConditionTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@

#include <model/CAnomalyDetectorModel.h>
#include <model/CDataGatherer.h>
#include <model/CDetectionRule.h>
#include <model/CRuleCondition.h>
#include <model/CSearchKey.h>
#include <model/ModelTypes.h>
#include <model/SModelParams.h>

#include "Mocks.h"
#include "ModelTestHelpers.h"

#include <boost/test/unit_test.hpp>

Expand All @@ -31,41 +31,31 @@ BOOST_AUTO_TEST_SUITE(CRuleConditionTest)
using namespace ml;
using namespace model;

namespace {

using TStrVec = std::vector<std::string>;

const std::string EMPTY_STRING;
}

BOOST_AUTO_TEST_CASE(testTimeContition) {
core_t::TTime bucketLength = 100;
core_t::TTime startTime = 100;
CSearchKey key;
SModelParams params(bucketLength);
CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators;
constexpr core_t::TTime bucketLength = 100;
constexpr core_t::TTime startTime = 100;
CSearchKey const key;
SModelParams const params(bucketLength);
const CAnomalyDetectorModel::TFeatureInfluenceCalculatorCPtrPrVecVec influenceCalculators;

model_t::TFeatureVec features;
features.push_back(model_t::E_IndividualMeanByPerson);
CAnomalyDetectorModel::TDataGathererPtr gathererPtr(std::make_shared<CDataGatherer>(
model_t::E_Metric, model_t::E_None, params, EMPTY_STRING, EMPTY_STRING, EMPTY_STRING,
EMPTY_STRING, EMPTY_STRING, TStrVec{}, key, features, startTime, 0));
auto gathererPtr = CDataGathererBuilder(model_t::E_Metric, features, params, key, startTime)
.buildSharedPtr();

CMockModel model(params, gathererPtr, influenceCalculators);
CMockModel const model(params, gathererPtr, influenceCalculators);

{
CRuleCondition condition;
condition.appliesTo(CRuleCondition::E_Time);
condition.op(CRuleCondition::E_GTE);
condition.value(500);

model_t::CResultType resultType(model_t::CResultType::E_Final);
model_t::CResultType const resultType(model_t::CResultType::E_Final);
BOOST_TEST_REQUIRE(condition.test(model, model_t::E_IndividualCountByBucketAndPerson,
resultType, std::size_t(0), std::size_t(1),
core_t::TTime(450)) == false);
resultType, 0, 1, 450) == false);
BOOST_TEST_REQUIRE(condition.test(model, model_t::E_IndividualCountByBucketAndPerson,
resultType, std::size_t(0),
std::size_t(1), core_t::TTime(550)));
resultType, 0, 1, 550));
}

{
Expand All @@ -74,13 +64,11 @@ BOOST_AUTO_TEST_CASE(testTimeContition) {
condition.op(CRuleCondition::E_LT);
condition.value(600);

model_t::CResultType resultType(model_t::CResultType::E_Final);
model_t::CResultType const resultType(model_t::CResultType::E_Final);
BOOST_TEST_REQUIRE(condition.test(model, model_t::E_IndividualCountByBucketAndPerson,
resultType, std::size_t(0), std::size_t(1),
core_t::TTime(600)) == false);
resultType, 0, 1, 600) == false);
BOOST_TEST_REQUIRE(condition.test(model, model_t::E_IndividualCountByBucketAndPerson,
resultType, std::size_t(0),
std::size_t(1), core_t::TTime(599)));
resultType, 0, 1, 599));
}
}

Expand Down
90 changes: 89 additions & 1 deletion lib/model/unittest/ModelTestHelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <model/CDataGatherer.h>
#include <model/CSearchKey.h>
#include <model/ModelTypes.h>
#include <model/SModelParams.h>

#include <boost/test/unit_test.hpp>

Expand Down Expand Up @@ -81,6 +80,95 @@ static void testGathererAttributes(const CDataGatherer& gatherer,
BOOST_REQUIRE_EQUAL(startTime, gatherer.currentBucketStartTime());
BOOST_REQUIRE_EQUAL(bucketLength, gatherer.bucketLength());
}

class CDataGathererBuilder {
public:
using TFeatureVec = CDataGatherer::TFeatureVec;
using TStrVec = CDataGatherer::TStrVec;

public:
CDataGathererBuilder(model_t::EAnalysisCategory gathererType,
const TFeatureVec& features,
const SModelParams& params,
const CSearchKey& searchKey,
const core_t::TTime startTime)
: m_Features(features), m_Params(params), m_StartTime(startTime),
m_SearchKey(searchKey), m_GathererType(gathererType) {}

CDataGatherer build() const {
return {m_GathererType,
m_SummaryMode,
m_Params,
m_SummaryCountFieldName,
m_PartitionFieldValue,
m_PersonFieldName,
m_AttributeFieldName,
m_ValueFieldName,
m_InfluenceFieldNames,
m_SearchKey,
m_Features,
m_StartTime,
m_SampleCountOverride};
}

std::shared_ptr<CDataGatherer> buildSharedPtr() const {
return std::make_shared<CDataGatherer>(
m_GathererType, m_SummaryMode, m_Params, m_SummaryCountFieldName,
m_PartitionFieldValue, m_PersonFieldName, m_AttributeFieldName,
m_ValueFieldName, m_InfluenceFieldNames, m_SearchKey, m_Features,
m_StartTime, m_SampleCountOverride);
}

CDataGathererBuilder& partitionFieldValue(std::string_view partitionFieldValue) {
m_PartitionFieldValue = partitionFieldValue;
return *this;
}

CDataGathererBuilder& personFieldName(std::string_view personFieldName) {
m_PersonFieldName = personFieldName;
return *this;
}

CDataGathererBuilder& valueFieldName(std::string_view valueFieldName) {
m_ValueFieldName = valueFieldName;
return *this;
}

CDataGathererBuilder& influenceFieldNames(const TStrVec& influenceFieldName) {
m_InfluenceFieldNames = influenceFieldName;
return *this;
}

CDataGathererBuilder& attributeFieldName(std::string_view attributeFieldName) {
m_AttributeFieldName = attributeFieldName;
return *this;
}

CDataGathererBuilder& gathererType(model_t::EAnalysisCategory gathererType) {
m_GathererType = gathererType;
return *this;
}

CDataGathererBuilder& sampleCountOverride(std::size_t sampleCount) {
m_SampleCountOverride = static_cast<int>(sampleCount);
return *this;
}

private:
const TFeatureVec& m_Features;
const SModelParams& m_Params;
core_t::TTime m_StartTime;
const CSearchKey& m_SearchKey;
model_t::EAnalysisCategory m_GathererType;
model_t::ESummaryMode m_SummaryMode{model_t::E_None};
std::string m_SummaryCountFieldName{EMPTY_STRING};
std::string m_PartitionFieldValue{EMPTY_STRING};
std::string m_PersonFieldName{EMPTY_STRING};
std::string m_AttributeFieldName{EMPTY_STRING};
std::string m_ValueFieldName{EMPTY_STRING};
TStrVec m_InfluenceFieldNames;
int m_SampleCountOverride{0};
};
}
}

Expand Down