From a7b1ce2b2320b4c2eca98db06ae2dc4503931490 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 15:39:45 +1300 Subject: [PATCH 01/49] [ML] Report the "actual" memory usage of the autodetect process Determine the actual memory usgae of the autodetect process as reported by the OS, e.g. on Linux this mould be the value of the maximum resident set size returned by a call to `getrusage`. Add this value to the model size stats record returned to the ES Java process so it can be included in the `job counts` tab for anomaly detection jobs. --- bin/autodetect/Main.cc | 6 +- include/core/CProgramCounters.h | 7 ++- include/model/CResourceMonitor.h | 3 + include/model/ModelTypes.h | 4 +- lib/api/CModelSizeStatsJsonWriter.cc | 4 ++ lib/api/unittest/CAnomalyJobLimitTest.cc | 31 +++++++++- lib/api/unittest/CJsonOutputWriterTest.cc | 59 ++++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 1 + lib/core/CProcessStats_Linux.cc | 7 ++- lib/core/CProcessStats_MacOSX.cc | 9 ++- lib/core/CProcessStats_Windows.cc | 12 +++- lib/model/CResourceMonitor.cc | 6 ++ lib/model/ModelTypes.cc | 2 + lib/model/unittest/CResourceMonitorTest.cc | 14 ++++- 14 files changed, 126 insertions(+), 39 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 904920e3db..bbb90c706a 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -83,7 +84,8 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberMemoryLimitModelCreationFailures, ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, - ml::counter_t::E_TSADOutputMemoryAllocatorUsage}; + ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADMaxResidentSetSize}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -151,6 +153,8 @@ int main(int argc, char** argv) { } cancellerThread.stop(); + LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 3c4d10269f..34d5cdbb26 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,6 +112,9 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The maximum resident set size of the process, in bytes. + E_TSADMaxResidentSetSize = 31, + // Data Frame Outlier Detection //! The estimated peak memory usage for outlier detection in bytes @@ -146,7 +149,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 31 + E_LastEnumCounter = 32 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -355,6 +358,8 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", + "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 5c7583888b..c9c887281f 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,6 +54,7 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; + std::size_t s_ActualMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -180,6 +181,8 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; + std::size_t actualMemoryUsage() const; + private: using TMonitoredResourcePtrSizeUMap = boost::unordered_map; diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index acbcc14c04..aeffe27e83 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,7 +719,9 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3 //!< Use highest ever actual model size + E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size + E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size + //!< of the process, as reported by the OS }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 43fef49602..75604c7f6a 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,6 +25,7 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; +const std::string ACTUAL_MEMORY_USAGE_BYTES{"actual_memory_usage_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; @@ -60,6 +61,9 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); + writer.onKey(ACTUAL_MEMORY_USAGE_BYTES); + writer.onUint64(results.s_ActualMemoryUsage); + writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index b003e90a53..938892589c 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -9,6 +9,7 @@ * limitation. */ #include +#include #include #include @@ -92,6 +93,10 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; + std::size_t actualUsage{0}; + std::size_t baseline{0}; + std::size_t nonLimitedAdjustedActualUsage{0}; + std::size_t limitedAdjustedActualUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -105,6 +110,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; + baseline = limits.resourceMonitor().actualMemoryUsage(); + //limits.resourceMonitor().m_ByteLimitHigh = 100000; //limits.resourceMonitor().m_ByteLimitLow = 90000; @@ -127,8 +134,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); + actualUsage = limits.resourceMonitor().actualMemoryUsage(); + nonLimitedAdjustedActualUsage = actualUsage - baseline; } } + LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); + LOG_DEBUG(<< "baseline: " << baseline); + LOG_DEBUG(<< "actualUsage: " << actualUsage); + LOG_DEBUG(<< "nonLimitedAdjustedActualUsage: " << nonLimitedAdjustedActualUsage); + BOOST_TEST_REQUIRE(nonLimitedAdjustedActualUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -138,6 +152,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; + baseline = limits.resourceMonitor().actualMemoryUsage(); + std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -166,11 +182,18 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); + actualUsage = limits.resourceMonitor().actualMemoryUsage(); + limitedAdjustedActualUsage = actualUsage - baseline; } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); + LOG_DEBUG(<< "baseline: " << baseline); + LOG_DEBUG(<< "actualUsage: " << actualUsage); + LOG_DEBUG(<< "limitedAdjustedActualUsage: " << limitedAdjustedActualUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); + BOOST_TEST_REQUIRE(limitedAdjustedActualUsage < nonLimitedAdjustedActualUsage); + BOOST_TEST_REQUIRE(limitedAdjustedActualUsage >= limitedUsage); } } @@ -375,6 +398,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage bytes = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -384,6 +408,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -423,11 +448,12 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { } core_t::TTime startOfBucket{ maths::common::CIntegerTools::floor(time, testParam.s_BucketLength)}; - auto used = limits.resourceMonitor().createMemoryUsageReport(startOfBucket); + auto used = limits.resourceMonitor(). createMemoryUsageReport(startOfBucket); LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -437,6 +463,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -479,6 +506,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); + LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -486,6 +514,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ba44163e7c..b4b9db851a 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,21 +1728,22 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ByFields = 5; - resourceUsage.s_PartitionFields = 6; - resourceUsage.s_OverFields = 7; - resourceUsage.s_AllocationFailures = 8; + resourceUsage.s_ActualMemoryUsage = 5; + resourceUsage.s_ByFields = 6; + resourceUsage.s_PartitionFields = 7; + resourceUsage.s_OverFields = 8; + resourceUsage.s_AllocationFailures = 9; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisCurrentModelBytes; - resourceUsage.s_BucketStartTime = 9; - resourceUsage.s_BytesExceeded = 10; - resourceUsage.s_BytesMemoryLimit = 11; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 12; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 13; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 17; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisActualMemoryUsageBytes; + resourceUsage.s_BucketStartTime = 10; + resourceUsage.s_BytesExceeded = 11; + resourceUsage.s_BytesMemoryLimit = 12; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 13; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 14; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 18; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1770,44 +1771,46 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); + BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("actual_memory_usage_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 6, sizeStats.at("total_partition_field_count").to_number()); + 7, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(8, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 8, sizeStats.at("bucket_allocation_failures_count").to_number()); + 9, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(9000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(10000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("current_model_bytes", + BOOST_REQUIRE_EQUAL("actual_memory_usage_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(10, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(11, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 11, sizeStats.at("model_bytes_memory_limit").to_number()); + 12, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(12, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(13, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 14, sizeStats.at("frequent_category_count").to_number()); + 15, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(15, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(16, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index ff404b7f91..eb4d382b9a 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,6 +36,7 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) + 409600, // Actual memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index e5ab8cdfd0..b0c02425e9 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -11,6 +11,7 @@ #include #include +#include #include #include @@ -87,7 +88,11 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - return static_cast(rusage.ru_maxrss * 1024L); + std::size_t maxRSS = static_cast(rusage.ru_maxrss * 1024L); + + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + + return maxRSS; } } } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 9aa1e969c9..f1c55e2aae 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -8,9 +8,11 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include +#include +#include + #include #include #include @@ -31,9 +33,10 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - + std::size_t maxRSS = static_cast(rusage.ru_maxrss); + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; // ru_maxrss is in bytes - return static_cast(rusage.ru_maxrss); + return maxRSS; } } } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 7ca2d7e6c0..78cb418d93 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -8,8 +8,10 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ -#include #include + +#include +#include #include #include @@ -36,7 +38,13 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "Failed to retrieve memory info " << CWindowsError()); return 0; } - return static_cast(stats.PeakWorkingSetSize); + + std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); + + + CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; + + return peakWorkingSetSize; } } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index d93b3b8bd8..a0b74ed6f3 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -13,6 +13,7 @@ #include #include +#include #include #include @@ -382,6 +383,7 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + res.s_ActualMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -491,5 +493,9 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } +std::size_t CResourceMonitor::actualMemoryUsage() const { + return core::CProcessStats::maxResidentSetSize(); +} + } // model } // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 2fab1d1c2a..a9140a66b3 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,6 +1733,8 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; + case E_AssignmentBasisActualMemoryUsageBytes: + return "actual_memory_usage_bytes"; } return "-"; } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f69dccc384..f8fe8fcc89 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -536,7 +536,7 @@ BOOST_FIXTURE_TEST_CASE(testExtraMemory, CTestFixture) { } BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { - // Clear the counter so that other test cases do not interfere. + // Clear the counters so that other test cases do not interfere. core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage) = 0; CLimits limits; @@ -549,6 +549,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -556,6 +559,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(-50); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -563,12 +569,18 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); monitor.sendMemoryUsageReport(0, 1); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); + + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From cb957cacfaa15484196a48c42467a8a5ab7c8040 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 15:53:50 +1300 Subject: [PATCH 02/49] Update changelog --- docs/CHANGELOG.asciidoc | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index 75117ca066..0b118eb82b 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -33,6 +33,7 @@ === Enhancements * Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) +* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846]) === Bug Fixes From 8f73f02d7486068b1d43f4a337b0593fcdd1ef12 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Apr 2025 16:28:53 +1300 Subject: [PATCH 03/49] Formatting --- include/model/ModelTypes.h | 2 +- lib/api/unittest/CAnomalyJobLimitTest.cc | 2 +- lib/api/unittest/CJsonOutputWriterTest.cc | 3 ++- lib/core/CProcessStats_Windows.cc | 1 - lib/model/unittest/CResourceMonitorTest.cc | 12 ++++++++---- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index aeffe27e83..ef09536551 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -721,7 +721,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size - //!< of the process, as reported by the OS + //!< of the process, as reported by the OS }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 938892589c..f1531f46ec 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -448,7 +448,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { } core_t::TTime startOfBucket{ maths::common::CIntegerTools::floor(time, testParam.s_BucketLength)}; - auto used = limits.resourceMonitor(). createMemoryUsageReport(startOfBucket); + auto used = limits.resourceMonitor().createMemoryUsageReport(startOfBucket); LOG_DEBUG(<< "# by = " << used.s_ByFields); LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index b4b9db851a..95c5e319b8 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1772,7 +1772,8 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("actual_memory_usage_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 5, sizeStats.at("actual_memory_usage_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 78cb418d93..d6e6e4649c 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -41,7 +41,6 @@ std::size_t CProcessStats::maxResidentSetSize() { std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; return peakWorkingSetSize; diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index f8fe8fcc89..193746addc 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -550,7 +550,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(100); @@ -560,7 +561,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(-50); @@ -570,7 +572,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); monitor.addExtraMemory(100); @@ -580,7 +583,8 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= + m_ReportedModelSizeStats.s_ActualMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From d3a39aedc57c7de332d81e3ce8797df5b142e409 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Apr 2025 15:43:50 +1200 Subject: [PATCH 04/49] Appease SonarQube --- lib/core/CProcessStats_Linux.cc | 2 +- lib/core/CProcessStats_MacOSX.cc | 2 +- lib/core/CProcessStats_Windows.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index b0c02425e9..c858b4e589 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -88,7 +88,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - std::size_t maxRSS = static_cast(rusage.ru_maxrss * 1024L); + auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index f1c55e2aae..c3edc85424 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -33,7 +33,7 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - std::size_t maxRSS = static_cast(rusage.ru_maxrss); + auto maxRSS = static_cast(rusage.ru_maxrss); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; // ru_maxrss is in bytes return maxRSS; diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index d6e6e4649c..d91db5e3c9 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -39,7 +39,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } - std::size_t peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); + auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; From e2d1bf5f0e2a04bd772fa77e7ea96a8582cf6f00 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Apr 2025 15:46:27 +1200 Subject: [PATCH 05/49] Tweak unit test for platform portability --- lib/api/unittest/CAnomalyJobLimitTest.cc | 32 +++++++----------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index f1531f46ec..348b4162db 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,10 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t actualUsage{0}; - std::size_t baseline{0}; - std::size_t nonLimitedAdjustedActualUsage{0}; - std::size_t limitedAdjustedActualUsage{0}; + std::size_t nonLimitedActualUsage{0}; + std::size_t limitedActualUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -110,10 +108,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); model::CLimits limits; - baseline = limits.resourceMonitor().actualMemoryUsage(); - - //limits.resourceMonitor().m_ByteLimitHigh = 100000; - //limits.resourceMonitor().m_ByteLimitLow = 90000; { LOG_TRACE(<< "Setting up job"); @@ -134,15 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - actualUsage = limits.resourceMonitor().actualMemoryUsage(); - nonLimitedAdjustedActualUsage = actualUsage - baseline; + nonLimitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "baseline: " << baseline); - LOG_DEBUG(<< "actualUsage: " << actualUsage); - LOG_DEBUG(<< "nonLimitedAdjustedActualUsage: " << nonLimitedAdjustedActualUsage); - BOOST_TEST_REQUIRE(nonLimitedAdjustedActualUsage >= nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedActualUsage: " << nonLimitedActualUsage); + BOOST_TEST_REQUIRE(nonLimitedActualUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -152,8 +143,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { model::CAnomalyDetectorModelConfig::defaultConfig(3600); model::CLimits limits; - baseline = limits.resourceMonitor().actualMemoryUsage(); - std::stringstream outputStrm; { core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm); @@ -182,18 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - actualUsage = limits.resourceMonitor().actualMemoryUsage(); - limitedAdjustedActualUsage = actualUsage - baseline; + limitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "baseline: " << baseline); - LOG_DEBUG(<< "actualUsage: " << actualUsage); - LOG_DEBUG(<< "limitedAdjustedActualUsage: " << limitedAdjustedActualUsage); + LOG_DEBUG(<< "Non-limited Actual Usage: " << nonLimitedActualUsage); + LOG_DEBUG(<< "Limited Actual Usage: " << limitedActualUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedAdjustedActualUsage < nonLimitedAdjustedActualUsage); - BOOST_TEST_REQUIRE(limitedAdjustedActualUsage >= limitedUsage); + BOOST_TEST_REQUIRE(limitedActualUsage >= limitedUsage); } } From 1a9a99ae5e69956976caea9d0963c70658efb25d Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 9 Apr 2025 16:01:06 +1200 Subject: [PATCH 06/49] Attend to review comments * ActualMemory -> SystemMemory * Report current resident set size as well as max --- bin/autodetect/Main.cc | 1 + include/core/CProgramCounters.h | 9 ++- include/model/CResourceMonitor.h | 9 ++- include/model/ModelTypes.h | 4 +- lib/api/CModelSizeStatsJsonWriter.cc | 10 ++- lib/api/unittest/CAnomalyJobLimitTest.cc | 33 +++++----- lib/api/unittest/CJsonOutputWriterTest.cc | 66 ++++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 7 +- lib/core/CProcessStats_Linux.cc | 2 + lib/core/CProcessStats_MacOSX.cc | 1 + lib/core/CProcessStats_Windows.cc | 6 +- lib/model/CResourceMonitor.cc | 10 ++- lib/model/ModelTypes.cc | 6 +- lib/model/unittest/CResourceMonitorTest.cc | 16 ++--- 14 files changed, 110 insertions(+), 70 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index bbb90c706a..11f7674634 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -85,6 +85,7 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, ml::counter_t::E_TSADOutputMemoryAllocatorUsage, + ml::counter_t::E_TSADResidentSetSize, ml::counter_t::E_TSADMaxResidentSetSize}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 34d5cdbb26..9bad82389a 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -112,8 +112,11 @@ enum ECounterTypes { //! The memory currently used by the allocators to output JSON documents, in bytes. E_TSADOutputMemoryAllocatorUsage = 30, + //! The resident set size of the process, in bytes. + E_TSADResidentSetSize = 31, + //! The maximum resident set size of the process, in bytes. - E_TSADMaxResidentSetSize = 31, + E_TSADMaxResidentSetSize = 32, // Data Frame Outlier Detection @@ -149,7 +152,7 @@ enum ECounterTypes { // Add any new values here //! This MUST be last, increment the value for every new enum added - E_LastEnumCounter = 32 + E_LastEnumCounter = 33 }; static constexpr std::size_t NUM_COUNTERS = static_cast(E_LastEnumCounter); @@ -358,6 +361,8 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, + {counter_t::E_TSADResidentSetSize, "E_TSADResidentSetSize", + "The resident set size of the process, in bytes"}, {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index c9c887281f..8cb9d5f3ac 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,7 +54,8 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; - std::size_t s_ActualMemoryUsage{0}; + std::size_t s_SystemMemoryUsage{0}; + std::size_t s_MaxSystemMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -181,7 +182,11 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - std::size_t actualMemoryUsage() const; + //! Returns the current physical memory of the process as reported by the system + std::size_t systemMemory() const; + + //! Returns the maximum physical memory of the processs as reported by the system + std::size_t maxSystemMemory() const; private: using TMonitoredResourcePtrSizeUMap = diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index ef09536551..66674ed39d 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -720,8 +720,8 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size - E_AssignmentBasisActualMemoryUsageBytes = 4 //!< Use the actual memory size - //!< of the process, as reported by the OS + E_AssignmentBasisSystemMemoryBytes = 4, //!< Use the current system memory size + E_AssignmentBasisMaxSystemMemoryBytes = 5 //!< Use the highest ever system memory size }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 75604c7f6a..12858914af 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -25,7 +25,8 @@ const std::string JOB_ID{"job_id"}; const std::string MODEL_SIZE_STATS{"model_size_stats"}; const std::string MODEL_BYTES{"model_bytes"}; const std::string PEAK_MODEL_BYTES{"peak_model_bytes"}; -const std::string ACTUAL_MEMORY_USAGE_BYTES{"actual_memory_usage_bytes"}; +const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"}; +const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"}; const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"}; const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"}; const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"}; @@ -61,8 +62,11 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); - writer.onKey(ACTUAL_MEMORY_USAGE_BYTES); - writer.onUint64(results.s_ActualMemoryUsage); + writer.onKey(SYSTEM_MEMORY_BYTES); + writer.onUint64(results.s_SystemMemoryUsage); + + writer.onKey(MAX_SYSTEM_MEMORY_BYTES); + writer.onUint64(results.s_MaxSystemMemoryUsage); writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 348b4162db..cc85baaf99 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,8 +93,8 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t nonLimitedActualUsage{0}; - std::size_t limitedActualUsage{0}; + std::size_t nonLimitedMaxSystemUsage{0}; + std::size_t limitedMaxSystemUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -128,12 +128,12 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); + nonLimitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "nonLimitedActualUsage: " << nonLimitedActualUsage); - BOOST_TEST_REQUIRE(nonLimitedActualUsage >= nonLimitedUsage); + LOG_DEBUG(<< "nonLimitedMaxSystemUsage: " << nonLimitedMaxSystemUsage); + BOOST_TEST_REQUIRE(nonLimitedMaxSystemUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -171,15 +171,15 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedActualUsage = limits.resourceMonitor().actualMemoryUsage(); + limitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "Non-limited Actual Usage: " << nonLimitedActualUsage); - LOG_DEBUG(<< "Limited Actual Usage: " << limitedActualUsage); + LOG_DEBUG(<< "Non-limited System Usage: " << nonLimitedMaxSystemUsage); + LOG_DEBUG(<< "Limited System Usage: " << limitedMaxSystemUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedActualUsage >= limitedUsage); + BOOST_TEST_REQUIRE(limitedMaxSystemUsage >= limitedUsage); } } @@ -384,7 +384,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage bytes = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage bytes = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage bytes = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -394,7 +395,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -439,7 +440,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -449,7 +451,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -492,7 +494,8 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "Actual memory usage = " << used.s_ActualMemoryUsage); + LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); + LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -500,7 +503,7 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 95c5e319b8..82c11c31f2 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,22 +1728,23 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_ActualMemoryUsage = 5; - resourceUsage.s_ByFields = 6; - resourceUsage.s_PartitionFields = 7; - resourceUsage.s_OverFields = 8; - resourceUsage.s_AllocationFailures = 9; + resourceUsage.s_SystemMemoryUsage = 5; + resourceUsage.s_MaxSystemMemoryUsage = 6; + resourceUsage.s_ByFields = 7; + resourceUsage.s_PartitionFields = 8; + resourceUsage.s_OverFields = 9; + resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisActualMemoryUsageBytes; - resourceUsage.s_BucketStartTime = 10; - resourceUsage.s_BytesExceeded = 11; - resourceUsage.s_BytesMemoryLimit = 12; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 13; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 14; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 17; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 18; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisSystemMemoryBytes; + resourceUsage.s_BucketStartTime = 11; + resourceUsage.s_BytesExceeded = 12; + resourceUsage.s_BytesMemoryLimit = 13; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 14; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 17; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 18; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 19; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1771,47 +1772,50 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("actual_memory_usage_bytes")); + BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); BOOST_REQUIRE_EQUAL( - 5, sizeStats.at("actual_memory_usage_bytes").to_number()); + 5, sizeStats.at("system_memory_bytes").to_number()); + BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); + BOOST_REQUIRE_EQUAL( + 6, sizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(6, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 7, sizeStats.at("total_partition_field_count").to_number()); + 8, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(8, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(9, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 9, sizeStats.at("bucket_allocation_failures_count").to_number()); + 10, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(10000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(11000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("actual_memory_usage_bytes", + BOOST_REQUIRE_EQUAL("system_memory_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(11, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(12, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 12, sizeStats.at("model_bytes_memory_limit").to_number()); + 13, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(13, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(14, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(14, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(15, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 15, sizeStats.at("frequent_category_count").to_number()); + 16, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(16, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(18, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(18, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(19, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index eb4d382b9a..c3c32aa022 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,7 +36,8 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) - 409600, // Actual memory used (max rss) + 409600, // System memory used (rss) + 413696, // Max system memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields @@ -117,6 +118,10 @@ BOOST_AUTO_TEST_CASE(testWrite) { BOOST_TEST_REQUIRE(modelSizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL( 60000, modelSizeStats.at("peak_model_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 409600, modelSizeStats.at("system_memory_bytes").to_number()); + BOOST_REQUIRE_EQUAL( + 413696, modelSizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(modelSizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL( 3, modelSizeStats.at("total_by_field_count").to_number()); diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index c858b4e589..6511a209f1 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,6 +76,8 @@ std::size_t CProcessStats::residentSetSize() { } } + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = rss; + return rss; } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index c3edc85424..855b0bdca9 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,6 +23,7 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = 0; return 0; } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index d91db5e3c9..e4e2baa163 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -29,7 +29,11 @@ std::size_t CProcessStats::residentSetSize() { return 0; } - return static_cast(stats.WorkingSetSize); + auto workingSetSize = static_cast(stats.WorkingSetSize); + + CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = workingSetSize; + + return workingSetSize; } std::size_t CProcessStats::maxResidentSetSize() { diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index a0b74ed6f3..69354f5629 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -383,7 +383,8 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); - res.s_ActualMemoryUsage = core::CProcessStats::maxResidentSetSize(); + res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); + res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -493,9 +494,12 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } -std::size_t CResourceMonitor::actualMemoryUsage() const { - return core::CProcessStats::maxResidentSetSize(); +std::size_t CResourceMonitor::systemMemory() const { + return core::CProcessStats::residentSetSize(); } +std::size_t CResourceMonitor::maxSystemMemory() const { + return core::CProcessStats::maxResidentSetSize(); +} } // model } // ml diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index a9140a66b3..1bddf8bae8 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,8 +1733,10 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; - case E_AssignmentBasisActualMemoryUsageBytes: - return "actual_memory_usage_bytes"; + case E_AssignmentBasisSystemMemoryBytes: + return "system_memory_bytes"; + case E_AssignmentBasisMaxSystemMemoryBytes: + return "max_system_memory_bytes"; } return "-"; } diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index 193746addc..c17c644c80 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -549,9 +549,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_SystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_SystemMemoryUsage); monitor.addExtraMemory(100); @@ -560,9 +560,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); monitor.addExtraMemory(-50); @@ -571,9 +571,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); monitor.addExtraMemory(100); @@ -582,9 +582,9 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_ActualMemoryUsage); + BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_ActualMemoryUsage); + m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From 5ae22cb50c89f8176df8eca696684d294f16cf69 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:08:27 +1200 Subject: [PATCH 07/49] Update bin/autodetect/Main.cc Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- bin/autodetect/Main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 11f7674634..93cb77e5d7 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -155,7 +155,7 @@ int main(int argc, char** argv) { cancellerThread.stop(); LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); - +LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program // statically links its own version library. From fe6f1fac7df9daf4b6a7a8f42c0ea3ce0f668a32 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:08:49 +1200 Subject: [PATCH 08/49] Update include/model/CResourceMonitor.h Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- include/model/CResourceMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 8cb9d5f3ac..d1815efa81 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -182,7 +182,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Returns the current physical memory of the process as reported by the system + //! Returns the current physical memory of the process (rss) as reported by the system std::size_t systemMemory() const; //! Returns the maximum physical memory of the processs as reported by the system From 582430ea4fbfbd0d5cf486af35fc4f469e0c50d0 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 11:09:09 +1200 Subject: [PATCH 09/49] Update include/model/CResourceMonitor.h Co-authored-by: Valeriy Khakhutskyy <1292899+valeriy42@users.noreply.github.com> --- include/model/CResourceMonitor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index d1815efa81..ff156d007d 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -185,7 +185,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the current physical memory of the process (rss) as reported by the system std::size_t systemMemory() const; - //! Returns the maximum physical memory of the processs as reported by the system + //! Returns the maximum physical memory of the process (max rss) as reported by the system std::size_t maxSystemMemory() const; private: From 9476edee78422a088b9c94d104680203d1c21bf2 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 14:59:43 +1200 Subject: [PATCH 10/49] Attend to review comments --- bin/autodetect/Main.cc | 6 +++--- include/core/CProgramCounters.h | 8 ++++---- include/model/CResourceMonitor.h | 2 +- include/model/ModelTypes.h | 4 +--- lib/api/unittest/CJsonOutputWriterTest.cc | 6 +++--- lib/core/CProcessStats_Linux.cc | 4 ++-- lib/core/CProcessStats_MacOSX.cc | 4 ++-- lib/core/CProcessStats_Windows.cc | 4 ++-- lib/model/ModelTypes.cc | 4 ---- 9 files changed, 18 insertions(+), 24 deletions(-) diff --git a/bin/autodetect/Main.cc b/bin/autodetect/Main.cc index 11f7674634..6037d4ba43 100644 --- a/bin/autodetect/Main.cc +++ b/bin/autodetect/Main.cc @@ -85,8 +85,8 @@ int main(int argc, char** argv) { ml::counter_t::E_TSADNumberPrunedItems, ml::counter_t::E_TSADAssignmentMemoryBasis, ml::counter_t::E_TSADOutputMemoryAllocatorUsage, - ml::counter_t::E_TSADResidentSetSize, - ml::counter_t::E_TSADMaxResidentSetSize}; + ml::counter_t::E_TSADSystemMemoryUsage, + ml::counter_t::E_TSADMaxSystemMemoryUsage}; ml::core::CProgramCounters::registerProgramCounterTypes(counters); @@ -154,7 +154,7 @@ int main(int argc, char** argv) { } cancellerThread.stop(); - LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize()); + LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize()); // Log the program version immediately after reconfiguring the logger. This // must be done from the program, and NOT a shared library, as each program diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index 9bad82389a..fd9c949a5d 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -113,10 +113,10 @@ enum ECounterTypes { E_TSADOutputMemoryAllocatorUsage = 30, //! The resident set size of the process, in bytes. - E_TSADResidentSetSize = 31, + E_TSADSystemMemoryUsage = 31, //! The maximum resident set size of the process, in bytes. - E_TSADMaxResidentSetSize = 32, + E_TSADMaxSystemMemoryUsage = 32, // Data Frame Outlier Detection @@ -361,9 +361,9 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, - {counter_t::E_TSADResidentSetSize, "E_TSADResidentSetSize", + {counter_t::E_TSADSystemMemoryUsage, "E_TSADResidentSetSize", "The resident set size of the process, in bytes"}, - {counter_t::E_TSADMaxResidentSetSize, "E_TSADMaxResidentSetSize", + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxResidentSetSize", "The maximum resident set size of the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 8cb9d5f3ac..cd8ea1ad58 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -182,7 +182,7 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the sum of used memory plus any extra memory std::size_t totalMemory() const; - //! Returns the current physical memory of the process as reported by the system + //! Returns the current physical memory (rss) of the process as reported by the system std::size_t systemMemory() const; //! Returns the maximum physical memory of the processs as reported by the system diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index 66674ed39d..9494935b92 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,9 +719,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3, //!< Use highest ever actual model size - E_AssignmentBasisSystemMemoryBytes = 4, //!< Use the current system memory size - E_AssignmentBasisMaxSystemMemoryBytes = 5 //!< Use the highest ever system memory size + E_AssignmentBasisPeakModelBytes = 3 }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 82c11c31f2..495fd77a52 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1717,7 +1717,7 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { BOOST_TEST_REQUIRE(quantileState.contains("timestamp")); } -BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { +BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::ostringstream sstream; { ml::core::CJsonOutputStreamWrapper outputStream(sstream); @@ -1735,7 +1735,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_OverFields = 9; resourceUsage.s_AllocationFailures = 10; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; - resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisSystemMemoryBytes; + resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes; resourceUsage.s_BucketStartTime = 11; resourceUsage.s_BytesExceeded = 12; resourceUsage.s_BytesMemoryLimit = 13; @@ -1793,7 +1793,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); - BOOST_REQUIRE_EQUAL("system_memory_bytes", + BOOST_REQUIRE_EQUAL("peak_model_bytes", sizeStats.at("assignment_memory_basis").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("log_time")); std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index 6511a209f1..ecaaf7f272 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,7 +76,7 @@ std::size_t CProcessStats::residentSetSize() { } } - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = rss; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = rss; return rss; } @@ -92,7 +92,7 @@ std::size_t CProcessStats::maxResidentSetSize() { // ru_maxrss is in kilobytes auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; return maxRSS; } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 855b0bdca9..d308463c4e 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,7 +23,7 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = 0; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = 0; return 0; } @@ -35,7 +35,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } auto maxRSS = static_cast(rusage.ru_maxrss); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = maxRSS; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; // ru_maxrss is in bytes return maxRSS; } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index e4e2baa163..cc88daad75 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -31,7 +31,7 @@ std::size_t CProcessStats::residentSetSize() { auto workingSetSize = static_cast(stats.WorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADResidentSetSize) = workingSetSize; + CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = workingSetSize; return workingSetSize; } @@ -45,7 +45,7 @@ std::size_t CProcessStats::maxResidentSetSize() { auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - CProgramCounters::counter(counter_t::E_TSADMaxResidentSetSize) = peakWorkingSetSize; + CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = peakWorkingSetSize; return peakWorkingSetSize; } diff --git a/lib/model/ModelTypes.cc b/lib/model/ModelTypes.cc index 1bddf8bae8..2fab1d1c2a 100644 --- a/lib/model/ModelTypes.cc +++ b/lib/model/ModelTypes.cc @@ -1733,10 +1733,6 @@ std::string print(EAssignmentMemoryBasis assignmentMemoryBasis) { return "current_model_bytes"; case E_AssignmentBasisPeakModelBytes: return "peak_model_bytes"; - case E_AssignmentBasisSystemMemoryBytes: - return "system_memory_bytes"; - case E_AssignmentBasisMaxSystemMemoryBytes: - return "max_system_memory_bytes"; } return "-"; } From 475fef1aec8111be815bae561bdc1b82d7bb37fb Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Apr 2025 15:12:47 +1200 Subject: [PATCH 11/49] Formatting --- lib/api/unittest/CJsonOutputWriterTest.cc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 495fd77a52..ea16f9a19d 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1717,7 +1717,7 @@ BOOST_AUTO_TEST_CASE(testPersistNormalizer) { BOOST_TEST_REQUIRE(quantileState.contains("timestamp")); } -BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { +BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::ostringstream sstream; { ml::core::CJsonOutputStreamWrapper outputStream(sstream); @@ -1773,8 +1773,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); - BOOST_REQUIRE_EQUAL( - 5, sizeStats.at("system_memory_bytes").to_number()); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); BOOST_REQUIRE_EQUAL( 6, sizeStats.at("max_system_memory_bytes").to_number()); From 6945d3fd1d439581526320677a5009d8425c5add Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 May 2025 15:50:37 +1200 Subject: [PATCH 12/49] Attend to failing unit tests (hopefully) * Address failing unit tests * More accurate, meaningful description of new program counters --- include/core/CProgramCounters.h | 8 ++++---- include/model/CResourceMonitor.h | 4 ++-- include/model/ModelTypes.h | 2 +- lib/api/CAnomalyJob.cc | 2 ++ lib/core/CProcessStats_Linux.cc | 8 +------- lib/core/CProcessStats_MacOSX.cc | 6 ++---- lib/core/CProcessStats_Windows.cc | 12 ++---------- lib/model/CResourceMonitor.cc | 4 ++-- 8 files changed, 16 insertions(+), 30 deletions(-) diff --git a/include/core/CProgramCounters.h b/include/core/CProgramCounters.h index fd9c949a5d..4fde6670ff 100644 --- a/include/core/CProgramCounters.h +++ b/include/core/CProgramCounters.h @@ -361,10 +361,10 @@ class CORE_EXPORT CProgramCounters { "Which option is being used to get model memory for node assignment?"}, {counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage", "The amount of memory used to output JSON documents, in bytes."}, - {counter_t::E_TSADSystemMemoryUsage, "E_TSADResidentSetSize", - "The resident set size of the process, in bytes"}, - {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxResidentSetSize", - "The maximum resident set size of the process, in bytes"}, + {counter_t::E_TSADSystemMemoryUsage, "E_TSADSystemMemoryUsage", + "The amount of system memory used by the process, in bytes"}, + {counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxSystemMemoryUsage", + "The maximum amount of system memory used by the process, in bytes"}, {counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage", "The upfront estimate of the peak memory outlier detection would use"}, {counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"}, diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index ff156d007d..cea75148ed 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -183,10 +183,10 @@ class MODEL_EXPORT CResourceMonitor { std::size_t totalMemory() const; //! Returns the current physical memory of the process (rss) as reported by the system - std::size_t systemMemory() const; + static std::size_t systemMemory() ; //! Returns the maximum physical memory of the process (max rss) as reported by the system - std::size_t maxSystemMemory() const; + static std::size_t maxSystemMemory(); private: using TMonitoredResourcePtrSizeUMap = diff --git a/include/model/ModelTypes.h b/include/model/ModelTypes.h index 9494935b92..acbcc14c04 100644 --- a/include/model/ModelTypes.h +++ b/include/model/ModelTypes.h @@ -719,7 +719,7 @@ enum EAssignmentMemoryBasis { E_AssignmentBasisUnknown = 0, //!< Decision made in Java code E_AssignmentBasisModelMemoryLimit = 1, //!< Use model memory limit E_AssignmentBasisCurrentModelBytes = 2, //!< Use current actual model size - E_AssignmentBasisPeakModelBytes = 3 + E_AssignmentBasisPeakModelBytes = 3 //!< Use highest ever actual model size }; //! Get a string description of \p assignmentMemoryBasis. diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index 3a8c06be2a..cc590d370b 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -205,6 +205,8 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t } ++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled); + core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = model::CResourceMonitor::systemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = model::CResourceMonitor::maxSystemMemory(); ++m_NumRecordsHandled; m_LatestRecordTime = std::max(m_LatestRecordTime, *time); diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index ecaaf7f272..e3635e279f 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -76,8 +76,6 @@ std::size_t CProcessStats::residentSetSize() { } } - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = rss; - return rss; } @@ -90,11 +88,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in kilobytes - auto maxRSS = static_cast(rusage.ru_maxrss * 1024L); - - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; - - return maxRSS; + return static_cast(rusage.ru_maxrss * 1024L); } } } diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index d308463c4e..ce86647aed 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -23,7 +23,6 @@ namespace core { std::size_t CProcessStats::residentSetSize() { // not supported on osx - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = 0; return 0; } @@ -34,10 +33,9 @@ std::size_t CProcessStats::maxResidentSetSize() { LOG_DEBUG(<< "failed to get resource usage(getrusage): " << ::strerror(errno)); return 0; } - auto maxRSS = static_cast(rusage.ru_maxrss); - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = maxRSS; + // ru_maxrss is in bytes - return maxRSS; + return static_cast(rusage.ru_maxrss);; } } } diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index cc88daad75..933952e1f2 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -29,11 +29,7 @@ std::size_t CProcessStats::residentSetSize() { return 0; } - auto workingSetSize = static_cast(stats.WorkingSetSize); - - CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = workingSetSize; - - return workingSetSize; + return static_cast(stats.WorkingSetSize); } std::size_t CProcessStats::maxResidentSetSize() { @@ -43,11 +39,7 @@ std::size_t CProcessStats::maxResidentSetSize() { return 0; } - auto peakWorkingSetSize = static_cast(stats.PeakWorkingSetSize); - - CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = peakWorkingSetSize; - - return peakWorkingSetSize; + return static_cast(stats.PeakWorkingSetSize); } } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 69354f5629..7d71df1afb 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -494,11 +494,11 @@ std::size_t CResourceMonitor::totalMemory() const { counter_t::E_TSADOutputMemoryAllocatorUsage)); } -std::size_t CResourceMonitor::systemMemory() const { +std::size_t CResourceMonitor::systemMemory() { return core::CProcessStats::residentSetSize(); } -std::size_t CResourceMonitor::maxSystemMemory() const { +std::size_t CResourceMonitor::maxSystemMemory() { return core::CProcessStats::maxResidentSetSize(); } } // model From 3b69b72f8227388ed654ee0de9d381adec4adf88 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 27 May 2025 16:11:25 +1200 Subject: [PATCH 13/49] Formatting.. grr --- include/model/CResourceMonitor.h | 2 +- lib/api/CAnomalyJob.cc | 6 ++++-- lib/core/CProcessStats_MacOSX.cc | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index cea75148ed..68758d4f09 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -183,7 +183,7 @@ class MODEL_EXPORT CResourceMonitor { std::size_t totalMemory() const; //! Returns the current physical memory of the process (rss) as reported by the system - static std::size_t systemMemory() ; + static std::size_t systemMemory(); //! Returns the maximum physical memory of the process (max rss) as reported by the system static std::size_t maxSystemMemory(); diff --git a/lib/api/CAnomalyJob.cc b/lib/api/CAnomalyJob.cc index cc590d370b..a334fe34dc 100644 --- a/lib/api/CAnomalyJob.cc +++ b/lib/api/CAnomalyJob.cc @@ -205,8 +205,10 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t } ++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled); - core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = model::CResourceMonitor::systemMemory(); - core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = model::CResourceMonitor::maxSystemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) = + model::CResourceMonitor::systemMemory(); + core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) = + model::CResourceMonitor::maxSystemMemory(); ++m_NumRecordsHandled; m_LatestRecordTime = std::max(m_LatestRecordTime, *time); diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index ce86647aed..61dc9fca66 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -35,7 +35,7 @@ std::size_t CProcessStats::maxResidentSetSize() { } // ru_maxrss is in bytes - return static_cast(rusage.ru_maxrss);; + return static_cast(rusage.ru_maxrss); } } } From 1bafa8cdf1663636f7bb59ffbd2f31488e90835b Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:24:50 +1200 Subject: [PATCH 14/49] On Linux only, use the value of the system memory usage (max resident set size) for the "model memory usage" and "peak model memory usage" fields reported to Java. --- include/model/CSystemMemoryUsage.h | 43 +++++++++++++++++++ lib/api/CModelSizeStatsJsonWriter.cc | 6 --- lib/api/unittest/CJsonOutputWriterTest.cc | 5 --- .../unittest/CModelSnapshotJsonWriterTest.cc | 4 -- lib/model/CMakeLists.txt | 1 + lib/model/CResourceMonitor.cc | 6 ++- lib/model/CSystemMemoryUsage.cc | 23 ++++++++++ lib/model/CSystemMemoryUsage_Linux.cc | 24 +++++++++++ 8 files changed, 95 insertions(+), 17 deletions(-) create mode 100644 include/model/CSystemMemoryUsage.h create mode 100644 lib/model/CSystemMemoryUsage.cc create mode 100644 lib/model/CSystemMemoryUsage_Linux.cc diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h new file mode 100644 index 0000000000..62adea2f29 --- /dev/null +++ b/include/model/CSystemMemoryUsage.h @@ -0,0 +1,43 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ +#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h +#define INCLUDED_ml_model_CSystemMemoryUsage_h + +#include + +#include + +namespace ml { +namespace model { + +//! \brief Determines the memory used by the current process. +//! +//! DESCRIPTION:\n +//! Determines the memory used by the current process based on the operating system. +class MODEL_EXPORT CSystemMemoryUsage { +public: + CSystemMemoryUsage() = default; + ~CSystemMemoryUsage() = default; + + CSystemMemoryUsage(const CSystemMemoryUsage&) = delete; + CSystemMemoryUsage(CSystemMemoryUsage&&) = delete; + CSystemMemoryUsage& operator=(const CSystemMemoryUsage&) = delete; + CSystemMemoryUsage& operator=(CSystemMemoryUsage&&) = delete; + + //! Return the system memory used by the current process. + //! \param memSize An estimate of the process memory + //! \return The system memory usage based on the current OS. + std::size_t operator()(std::size_t memSize); +}; +} +} + +#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/api/CModelSizeStatsJsonWriter.cc b/lib/api/CModelSizeStatsJsonWriter.cc index 12858914af..68b5b80c88 100644 --- a/lib/api/CModelSizeStatsJsonWriter.cc +++ b/lib/api/CModelSizeStatsJsonWriter.cc @@ -62,12 +62,6 @@ void CModelSizeStatsJsonWriter::write(const std::string& jobId, writer.onKey(PEAK_MODEL_BYTES); writer.onUint64(results.s_AdjustedPeakUsage); - writer.onKey(SYSTEM_MEMORY_BYTES); - writer.onUint64(results.s_SystemMemoryUsage); - - writer.onKey(MAX_SYSTEM_MEMORY_BYTES); - writer.onUint64(results.s_MaxSystemMemoryUsage); - writer.onKey(MODEL_BYTES_EXCEEDED); writer.onUint64(results.s_BytesExceeded); diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index ea16f9a19d..19caa4dfc1 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1772,11 +1772,6 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_REQUIRE_EQUAL(2, sizeStats.at("model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("system_memory_bytes")); - BOOST_REQUIRE_EQUAL(5, sizeStats.at("system_memory_bytes").to_number()); - BOOST_TEST_REQUIRE(sizeStats.contains("max_system_memory_bytes")); - BOOST_REQUIRE_EQUAL( - 6, sizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index c3c32aa022..f2c22a41de 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -118,10 +118,6 @@ BOOST_AUTO_TEST_CASE(testWrite) { BOOST_TEST_REQUIRE(modelSizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL( 60000, modelSizeStats.at("peak_model_bytes").to_number()); - BOOST_REQUIRE_EQUAL( - 409600, modelSizeStats.at("system_memory_bytes").to_number()); - BOOST_REQUIRE_EQUAL( - 413696, modelSizeStats.at("max_system_memory_bytes").to_number()); BOOST_TEST_REQUIRE(modelSizeStats.contains("total_by_field_count")); BOOST_REQUIRE_EQUAL( 3, modelSizeStats.at("total_by_field_count").to_number()); diff --git a/lib/model/CMakeLists.txt b/lib/model/CMakeLists.txt index c53eec9fb0..37c8c1cc5c 100644 --- a/lib/model/CMakeLists.txt +++ b/lib/model/CMakeLists.txt @@ -75,6 +75,7 @@ ml_add_library(MlModel SHARED CSampleCounts.cc CSearchKey.cc CSimpleCountDetector.cc + CSystemMemoryUsage.cc CTokenListCategory.cc CTokenListDataCategorizerBase.cc CTokenListReverseSearchCreator.cc diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 7d71df1afb..1e131ce0b2 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -378,11 +379,12 @@ void CResourceMonitor::sendMemoryUsageReport(core_t::TTime bucketStartTime, CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; + CSystemMemoryUsage systemMemoryUsage; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); + res.s_AdjustedUsage = systemMemoryUsage(this->adjustedUsage(res.s_Usage)); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + res.s_AdjustedPeakUsage = systemMemoryUsage(this->adjustedUsage(res.s_PeakUsage)); res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc new file mode 100644 index 0000000000..ae275beb34 --- /dev/null +++ b/lib/model/CSystemMemoryUsage.cc @@ -0,0 +1,23 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +namespace ml { +namespace model { +// On platforms other than Linux the system memory usage is that provided - the estimated size of the models. +std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) { + return memSize; +} +} +} diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc new file mode 100644 index 0000000000..a32026f2a5 --- /dev/null +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -0,0 +1,24 @@ +/* +* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +#include + +namespace ml { +namespace model { +// On Linux the system memory usage is actually that determined by the OS. +// The estimated value provided is ignored. +std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) { + return core::CProcessStats::maxResidentSetSize(); +} +} +} From efc311b5f432e3ed6183a5f7b74c28300973cbd5 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:36:50 +1200 Subject: [PATCH 15/49] Fix copyright headers --- include/model/CSystemMemoryUsage.h | 3 ++- lib/model/CSystemMemoryUsage.cc | 2 +- lib/model/CSystemMemoryUsage_Linux.cc | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h index 62adea2f29..6e78091521 100644 --- a/include/model/CSystemMemoryUsage.h +++ b/include/model/CSystemMemoryUsage.h @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when @@ -8,6 +8,7 @@ * compliance with the Elastic License 2.0 and the foregoing additional * limitation. */ + #ifndef INCLUDED_ml_model_CSystemMemoryUsage_h #define INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc index ae275beb34..8fd926f65b 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CSystemMemoryUsage.cc @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc index a32026f2a5..f3b21522d5 100644 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -1,5 +1,5 @@ /* -* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one * or more contributor license agreements. Licensed under the Elastic License * 2.0 and the following additional limitation. Functionality enabled by the * files subject to the Elastic License 2.0 may only be used in production when From 6be6395223ebe9ac0434c9cec57d6adada30a2a9 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 4 Jun 2025 16:50:11 +1200 Subject: [PATCH 16/49] Nits in test code --- lib/api/unittest/CAnomalyJobLimitTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index cc85baaf99..20e422134f 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -128,7 +128,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); + nonLimitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); @@ -171,7 +171,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedMaxSystemUsage = limits.resourceMonitor().maxSystemMemory(); + limitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); From 134a4940771904e1eeb1922dd7f8835852189dac Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 9 Jun 2025 16:06:26 +1200 Subject: [PATCH 17/49] Attend to code review comments On Linux, use systemMemoryUsage for the value of totalMem. Do not "adjust" this value as is done for the estimated usage, as it is unnecessary. --- include/model/CSystemMemoryUsage.h | 12 ++++++++-- lib/model/CResourceMonitor.cc | 32 ++++++++++++++++----------- lib/model/CSystemMemoryUsage.cc | 9 +++++--- lib/model/CSystemMemoryUsage_Linux.cc | 7 +++++- 4 files changed, 41 insertions(+), 19 deletions(-) diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h index 6e78091521..066fc2e152 100644 --- a/include/model/CSystemMemoryUsage.h +++ b/include/model/CSystemMemoryUsage.h @@ -14,7 +14,7 @@ #include -#include +#include namespace ml { namespace model { @@ -24,6 +24,8 @@ namespace model { //! DESCRIPTION:\n //! Determines the memory used by the current process based on the operating system. class MODEL_EXPORT CSystemMemoryUsage { +public: + using TMemoryAdjuster = std::function; public: CSystemMemoryUsage() = default; ~CSystemMemoryUsage() = default; @@ -36,7 +38,13 @@ class MODEL_EXPORT CSystemMemoryUsage { //! Return the system memory used by the current process. //! \param memSize An estimate of the process memory //! \return The system memory usage based on the current OS. - std::size_t operator()(std::size_t memSize); + std::size_t operator()(std::size_t memSize) const; + + //! Potentially adjust the provided memory usage value, depending on the current platform. + //! \param usage The memory usage of the current process. + //! \param memAdjuster Function to adjust the memory usage, if needed. + //! \return The (potentially adjusted) system memory usage. + static std::size_t maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& memAdjuster); }; } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 1e131ce0b2..e9b365ab97 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -379,12 +379,11 @@ void CResourceMonitor::sendMemoryUsageReport(core_t::TTime bucketStartTime, CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; - CSystemMemoryUsage systemMemoryUsage; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = systemMemoryUsage(this->adjustedUsage(res.s_Usage)); + res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = systemMemoryUsage(this->adjustedUsage(res.s_PeakUsage)); + res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; @@ -406,16 +405,22 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { } std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { - // We scale the reported memory usage by the inverse of the byte limit margin. - // This gives the user a fairer indication of how close the job is to hitting - // the model memory limit in a concise manner (as the limit is scaled down by - // the margin during the beginning period of the job's existence). - std::size_t adjustedUsage{ - static_cast(static_cast(usage) / m_ByteLimitMargin)}; + const std::size_t persistenceMemoryIncreaseFactor = this->persistenceMemoryIncreaseFactor(); + + return CSystemMemoryUsage::maybeAdjustUsage( + usage, [&byteLimitMargin=m_ByteLimitMargin, &persistenceMemoryIncreaseFactor](std::size_t usage_) { + // On platforms that estimate the memory usage, it is scaled by the inverse of the byte limit margin. + // This gives the user a fairer indication of how close the job is to hitting + // the model memory limit in a concise manner (as the limit is scaled down by + // the margin during the beginning period of the job's existence). + std::size_t adjustedUsage{ + static_cast(static_cast(usage_) / byteLimitMargin) + }; - adjustedUsage *= this->persistenceMemoryIncreaseFactor(); + adjustedUsage *= persistenceMemoryIncreaseFactor; - return adjustedUsage; + return adjustedUsage; + }); } std::size_t CResourceMonitor::persistenceMemoryIncreaseFactor() const { @@ -491,9 +496,10 @@ std::size_t CResourceMonitor::lowLimit() const { } std::size_t CResourceMonitor::totalMemory() const { - return m_MonitoredResourceCurrentMemory + m_ExtraMemory + + CSystemMemoryUsage systemMemoryUsage; + return systemMemoryUsage(m_MonitoredResourceCurrentMemory + m_ExtraMemory + static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage)); + counter_t::E_TSADOutputMemoryAllocatorUsage))); } std::size_t CResourceMonitor::systemMemory() { diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc index 8fd926f65b..b79d9a0a4c 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CSystemMemoryUsage.cc @@ -11,13 +11,16 @@ #include -#include - namespace ml { namespace model { // On platforms other than Linux the system memory usage is that provided - the estimated size of the models. -std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) { +std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) const { return memSize; } + +// On platforms other than Linux the system memory is estimated. It must be adjusted before being reported +std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& memAdjuster) { + return memAdjuster(usage); +} } } diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc index f3b21522d5..0b25eef8e1 100644 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -17,8 +17,13 @@ namespace ml { namespace model { // On Linux the system memory usage is actually that determined by the OS. // The estimated value provided is ignored. -std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) { +std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) const { return core::CProcessStats::maxResidentSetSize(); } + +std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& /*memAdjuster*/) { + // On Linux we use the actual system memory usage, rather tha an estimated value, so there is no need to adjust it. + return usage; +} } } From ed426aca39cfb491c366142c2a3fe3de68aa0fcc Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 9 Jun 2025 16:15:10 +1200 Subject: [PATCH 18/49] Formatting --- include/model/CSystemMemoryUsage.h | 1 + lib/model/CResourceMonitor.cc | 31 ++++++++++++++------------- lib/model/CSystemMemoryUsage.cc | 3 ++- lib/model/CSystemMemoryUsage_Linux.cc | 3 ++- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h index 066fc2e152..3d0b83bf40 100644 --- a/include/model/CSystemMemoryUsage.h +++ b/include/model/CSystemMemoryUsage.h @@ -26,6 +26,7 @@ namespace model { class MODEL_EXPORT CSystemMemoryUsage { public: using TMemoryAdjuster = std::function; + public: CSystemMemoryUsage() = default; ~CSystemMemoryUsage() = default; diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index e9b365ab97..fff68a0014 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -405,22 +405,23 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { } std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { - const std::size_t persistenceMemoryIncreaseFactor = this->persistenceMemoryIncreaseFactor(); + const std::size_t persistenceMemoryIncreaseFactor = + this->persistenceMemoryIncreaseFactor(); - return CSystemMemoryUsage::maybeAdjustUsage( - usage, [&byteLimitMargin=m_ByteLimitMargin, &persistenceMemoryIncreaseFactor](std::size_t usage_) { - // On platforms that estimate the memory usage, it is scaled by the inverse of the byte limit margin. - // This gives the user a fairer indication of how close the job is to hitting - // the model memory limit in a concise manner (as the limit is scaled down by - // the margin during the beginning period of the job's existence). - std::size_t adjustedUsage{ - static_cast(static_cast(usage_) / byteLimitMargin) - }; + return CSystemMemoryUsage::maybeAdjustUsage(usage, [ + &byteLimitMargin = m_ByteLimitMargin, &persistenceMemoryIncreaseFactor + ](std::size_t usage_) { + // On platforms that estimate the memory usage, it is scaled by the inverse of the byte limit margin. + // This gives the user a fairer indication of how close the job is to hitting + // the model memory limit in a concise manner (as the limit is scaled down by + // the margin during the beginning period of the job's existence). + std::size_t adjustedUsage{ + static_cast(static_cast(usage_) / byteLimitMargin)}; - adjustedUsage *= persistenceMemoryIncreaseFactor; + adjustedUsage *= persistenceMemoryIncreaseFactor; - return adjustedUsage; - }); + return adjustedUsage; + }); } std::size_t CResourceMonitor::persistenceMemoryIncreaseFactor() const { @@ -498,8 +499,8 @@ std::size_t CResourceMonitor::lowLimit() const { std::size_t CResourceMonitor::totalMemory() const { CSystemMemoryUsage systemMemoryUsage; return systemMemoryUsage(m_MonitoredResourceCurrentMemory + m_ExtraMemory + - static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage))); + static_cast(core::CProgramCounters::counter( + counter_t::E_TSADOutputMemoryAllocatorUsage))); } std::size_t CResourceMonitor::systemMemory() { diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CSystemMemoryUsage.cc index b79d9a0a4c..ee86e22f44 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CSystemMemoryUsage.cc @@ -19,7 +19,8 @@ std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) const { } // On platforms other than Linux the system memory is estimated. It must be adjusted before being reported -std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& memAdjuster) { +std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, + const TMemoryAdjuster& memAdjuster) { return memAdjuster(usage); } } diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc index 0b25eef8e1..76a3ca87a8 100644 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ b/lib/model/CSystemMemoryUsage_Linux.cc @@ -21,7 +21,8 @@ std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) const { return core::CProcessStats::maxResidentSetSize(); } -std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& /*memAdjuster*/) { +std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, + const TMemoryAdjuster& /*memAdjuster*/) { // On Linux we use the actual system memory usage, rather tha an estimated value, so there is no need to adjust it. return usage; } From 4c8bf8bd63a675fc2cb9c09b8d651daad89e44c0 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 11 Jun 2025 17:11:41 +1200 Subject: [PATCH 19/49] Attend to code review comments * Roll back a previous change that returned system memory from CResourceManager::totalMemory * Remove unneeded code * Reworked platform dependent code to make the intent mor explicit --- include/model/CProcessMemoryUsage.h | 48 +++++++++++++++ include/model/CResourceMonitor.h | 5 +- include/model/CSystemMemoryUsage.h | 53 ----------------- lib/api/unittest/CAnomalyJobLimitTest.cc | 18 ------ lib/api/unittest/CJsonOutputWriterTest.cc | 54 ++++++++--------- .../unittest/CModelSnapshotJsonWriterTest.cc | 2 - lib/core/CProcessStats_Linux.cc | 1 - lib/core/CProcessStats_MacOSX.cc | 1 - lib/core/CProcessStats_Windows.cc | 1 - lib/core/unittest/CLoggerTest.cc | 2 +- lib/core/unittest/CNamedPipeFactoryTest.cc | 59 ++++++++++--------- lib/model/CMakeLists.txt | 2 +- ...mMemoryUsage.cc => CProcessMemoryUsage.cc} | 14 ++--- lib/model/CProcessMemoryUsage_Linux.cc | 21 +++++++ lib/model/CResourceMonitor.cc | 56 ++++++++++-------- lib/model/CSystemMemoryUsage_Linux.cc | 30 ---------- lib/model/unittest/CResourceMonitorTest.cc | 16 ----- 17 files changed, 165 insertions(+), 218 deletions(-) create mode 100644 include/model/CProcessMemoryUsage.h delete mode 100644 include/model/CSystemMemoryUsage.h rename lib/model/{CSystemMemoryUsage.cc => CProcessMemoryUsage.cc} (52%) create mode 100644 lib/model/CProcessMemoryUsage_Linux.cc delete mode 100644 lib/model/CSystemMemoryUsage_Linux.cc diff --git a/include/model/CProcessMemoryUsage.h b/include/model/CProcessMemoryUsage.h new file mode 100644 index 0000000000..6a449ebf84 --- /dev/null +++ b/include/model/CProcessMemoryUsage.h @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h +#define INCLUDED_ml_model_CSystemMemoryUsage_h + +#include + +#include + +namespace ml { +namespace model { + +//! \brief Determines how to calculate the memory used by the current process. +//! +//! DESCRIPTION:\n +//! Determines how to calculate the memory used by the current process based on the operating system. +//! On some OS's (Mac, Windows) we use the estimated memory usage of the models, +//! while on others (Linux) we use the actual memory of the process as provided by system calls. +class MODEL_EXPORT CProcessMemoryUsage { +public: + enum class EMemoryStrategy { E_Estimated, E_System }; + +public: + CProcessMemoryUsage() = default; + ~CProcessMemoryUsage() = default; + + CProcessMemoryUsage(const CProcessMemoryUsage&) = delete; + CProcessMemoryUsage(CProcessMemoryUsage&&) = delete; + CProcessMemoryUsage& operator=(const CProcessMemoryUsage&) = delete; + CProcessMemoryUsage& operator=(CProcessMemoryUsage&&) = delete; + + //! Return the strategy used to calculate the memory used by the current process. + //! \return Strategy used to calculate the process memory based on the current OS. + static EMemoryStrategy memoryStrategy(); +}; +} +} + +#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/include/model/CResourceMonitor.h b/include/model/CResourceMonitor.h index 68758d4f09..411c3671e9 100644 --- a/include/model/CResourceMonitor.h +++ b/include/model/CResourceMonitor.h @@ -54,8 +54,6 @@ class MODEL_EXPORT CResourceMonitor { std::size_t s_AdjustedUsage{0}; std::size_t s_PeakUsage{0}; std::size_t s_AdjustedPeakUsage{0}; - std::size_t s_SystemMemoryUsage{0}; - std::size_t s_MaxSystemMemoryUsage{0}; std::size_t s_ByFields{0}; std::size_t s_PartitionFields{0}; std::size_t s_OverFields{0}; @@ -237,6 +235,9 @@ class MODEL_EXPORT CResourceMonitor { //! Returns the amount by which reported memory usage is scaled depending on the type of persistence in use std::size_t persistenceMemoryIncreaseFactor() const; + //! Modify the supplied usage value depending on a platform dependent strategy. + std::size_t applyMemoryStrategy(std::size_t usage) const; + private: //! The registered collection of components TMonitoredResourcePtrSizeUMap m_Resources; diff --git a/include/model/CSystemMemoryUsage.h b/include/model/CSystemMemoryUsage.h deleted file mode 100644 index 3d0b83bf40..0000000000 --- a/include/model/CSystemMemoryUsage.h +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h -#define INCLUDED_ml_model_CSystemMemoryUsage_h - -#include - -#include - -namespace ml { -namespace model { - -//! \brief Determines the memory used by the current process. -//! -//! DESCRIPTION:\n -//! Determines the memory used by the current process based on the operating system. -class MODEL_EXPORT CSystemMemoryUsage { -public: - using TMemoryAdjuster = std::function; - -public: - CSystemMemoryUsage() = default; - ~CSystemMemoryUsage() = default; - - CSystemMemoryUsage(const CSystemMemoryUsage&) = delete; - CSystemMemoryUsage(CSystemMemoryUsage&&) = delete; - CSystemMemoryUsage& operator=(const CSystemMemoryUsage&) = delete; - CSystemMemoryUsage& operator=(CSystemMemoryUsage&&) = delete; - - //! Return the system memory used by the current process. - //! \param memSize An estimate of the process memory - //! \return The system memory usage based on the current OS. - std::size_t operator()(std::size_t memSize) const; - - //! Potentially adjust the provided memory usage value, depending on the current platform. - //! \param usage The memory usage of the current process. - //! \param memAdjuster Function to adjust the memory usage, if needed. - //! \return The (potentially adjusted) system memory usage. - static std::size_t maybeAdjustUsage(std::size_t usage, const TMemoryAdjuster& memAdjuster); -}; -} -} - -#endif //INCLUDED_ml_model_CSystemMemoryUsage_h diff --git a/lib/api/unittest/CAnomalyJobLimitTest.cc b/lib/api/unittest/CAnomalyJobLimitTest.cc index 20e422134f..e4cffde084 100644 --- a/lib/api/unittest/CAnomalyJobLimitTest.cc +++ b/lib/api/unittest/CAnomalyJobLimitTest.cc @@ -93,8 +93,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { std::size_t nonLimitedUsage{0}; std::size_t limitedUsage{0}; - std::size_t nonLimitedMaxSystemUsage{0}; - std::size_t limitedMaxSystemUsage{0}; { // Without limits, this data set should make the models around // 1230000 bytes @@ -128,12 +126,9 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { BOOST_REQUIRE_EQUAL(uint64_t(18630), job.numRecordsHandled()); nonLimitedUsage = limits.resourceMonitor().totalMemory(); - nonLimitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } } LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage); - LOG_DEBUG(<< "nonLimitedMaxSystemUsage: " << nonLimitedMaxSystemUsage); - BOOST_TEST_REQUIRE(nonLimitedMaxSystemUsage >= nonLimitedUsage); { // Now run the data with limiting ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig( @@ -171,15 +166,11 @@ BOOST_AUTO_TEST_CASE(testAccuracy) { // TODO this limit must be tightened once there is more granular // control over the model memory creation limitedUsage = limits.resourceMonitor().totalMemory(); - limitedMaxSystemUsage = model::CResourceMonitor::maxSystemMemory(); } LOG_TRACE(<< outputStrm.str()); LOG_DEBUG(<< "Non-limited usage: " << nonLimitedUsage << "; limited: " << limitedUsage); - LOG_DEBUG(<< "Non-limited System Usage: " << nonLimitedMaxSystemUsage); - LOG_DEBUG(<< "Limited System Usage: " << limitedMaxSystemUsage); BOOST_TEST_REQUIRE(limitedUsage < nonLimitedUsage); - BOOST_TEST_REQUIRE(limitedMaxSystemUsage >= limitedUsage); } } @@ -384,8 +375,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage bytes = " << used.s_Usage); - LOG_DEBUG(<< "System memory usage bytes = " << used.s_SystemMemoryUsage); - LOG_DEBUG(<< "Max system memory usage bytes = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * core::constants::BYTES_IN_MEGABYTES); BOOST_TEST_REQUIRE(used.s_ByFields > testParam.s_ExpectedByFields); @@ -395,7 +384,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedByMemoryUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test partition with bucketLength = " << testParam.s_BucketLength @@ -440,8 +428,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# partition = " << used.s_PartitionFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); - LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_PartitionFields >= testParam.s_ExpectedPartitionFields); BOOST_TEST_REQUIRE(used.s_PartitionFields < 450); @@ -451,7 +437,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedPartitionUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } LOG_DEBUG(<< "**** Test over with bucketLength = " << testParam.s_BucketLength @@ -494,8 +479,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { LOG_DEBUG(<< "# over = " << used.s_OverFields); LOG_DEBUG(<< "Memory status = " << used.s_MemoryStatus); LOG_DEBUG(<< "Memory usage = " << used.s_Usage); - LOG_DEBUG(<< "System memory usage = " << used.s_SystemMemoryUsage); - LOG_DEBUG(<< "Max system memory usage = " << used.s_MaxSystemMemoryUsage); LOG_DEBUG(<< "Memory limit bytes = " << memoryLimit * 1024 * 1024); BOOST_TEST_REQUIRE(used.s_OverFields > testParam.s_ExpectedOverFields); BOOST_TEST_REQUIRE(used.s_OverFields <= 9000); @@ -503,7 +486,6 @@ BOOST_AUTO_TEST_CASE(testModelledEntityCountForFixedMemoryLimit) { memoryLimit * core::constants::BYTES_IN_MEGABYTES / 2, used.s_Usage, memoryLimit * core::constants::BYTES_IN_MEGABYTES / testParam.s_ExpectedOverUsageRelativeErrorDivisor); - BOOST_TEST_REQUIRE(used.s_Usage <= used.s_MaxSystemMemoryUsage); } } } diff --git a/lib/api/unittest/CJsonOutputWriterTest.cc b/lib/api/unittest/CJsonOutputWriterTest.cc index 19caa4dfc1..16772158f8 100644 --- a/lib/api/unittest/CJsonOutputWriterTest.cc +++ b/lib/api/unittest/CJsonOutputWriterTest.cc @@ -1728,23 +1728,21 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { resourceUsage.s_AdjustedUsage = 2; resourceUsage.s_PeakUsage = 3; resourceUsage.s_AdjustedPeakUsage = 4; - resourceUsage.s_SystemMemoryUsage = 5; - resourceUsage.s_MaxSystemMemoryUsage = 6; - resourceUsage.s_ByFields = 7; - resourceUsage.s_PartitionFields = 8; - resourceUsage.s_OverFields = 9; - resourceUsage.s_AllocationFailures = 10; + resourceUsage.s_ByFields = 5; + resourceUsage.s_PartitionFields = 6; + resourceUsage.s_OverFields = 7; + resourceUsage.s_AllocationFailures = 8; resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit; resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes; - resourceUsage.s_BucketStartTime = 11; - resourceUsage.s_BytesExceeded = 12; - resourceUsage.s_BytesMemoryLimit = 13; - resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 14; - resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 15; - resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 16; - resourceUsage.s_OverallCategorizerStats.s_RareCategories = 17; - resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 18; - resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 19; + resourceUsage.s_BucketStartTime = 9; + resourceUsage.s_BytesExceeded = 10; + resourceUsage.s_BytesMemoryLimit = 11; + resourceUsage.s_OverallCategorizerStats.s_CategorizedMessages = 12; + resourceUsage.s_OverallCategorizerStats.s_TotalCategories = 13; + resourceUsage.s_OverallCategorizerStats.s_FrequentCategories = 14; + resourceUsage.s_OverallCategorizerStats.s_RareCategories = 15; + resourceUsage.s_OverallCategorizerStats.s_DeadCategories = 16; + resourceUsage.s_OverallCategorizerStats.s_MemoryCategorizationFailures = 17; resourceUsage.s_OverallCategorizerStats.s_CategorizationStatus = ml::model_t::E_CategorizationStatusWarn; @@ -1773,17 +1771,17 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { BOOST_TEST_REQUIRE(sizeStats.contains("peak_model_bytes")); BOOST_REQUIRE_EQUAL(4, sizeStats.at("peak_model_bytes").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_by_field_count")); - BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_by_field_count").to_number()); + BOOST_REQUIRE_EQUAL(5, sizeStats.at("total_by_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_partition_field_count")); BOOST_REQUIRE_EQUAL( - 8, sizeStats.at("total_partition_field_count").to_number()); + 6, sizeStats.at("total_partition_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_over_field_count")); - BOOST_REQUIRE_EQUAL(9, sizeStats.at("total_over_field_count").to_number()); + BOOST_REQUIRE_EQUAL(7, sizeStats.at("total_over_field_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("bucket_allocation_failures_count")); BOOST_REQUIRE_EQUAL( - 10, sizeStats.at("bucket_allocation_failures_count").to_number()); + 8, sizeStats.at("bucket_allocation_failures_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("timestamp")); - BOOST_REQUIRE_EQUAL(11000, sizeStats.at("timestamp").to_number()); + BOOST_REQUIRE_EQUAL(9000, sizeStats.at("timestamp").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("memory_status")); BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string()); BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis")); @@ -1793,23 +1791,23 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) { std::int64_t nowMs{ml::core::CTimeUtils::nowMs()}; BOOST_TEST_REQUIRE(nowMs >= sizeStats.at("log_time").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_exceeded")); - BOOST_REQUIRE_EQUAL(12, sizeStats.at("model_bytes_exceeded").to_number()); + BOOST_REQUIRE_EQUAL(10, sizeStats.at("model_bytes_exceeded").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("model_bytes_memory_limit")); BOOST_REQUIRE_EQUAL( - 13, sizeStats.at("model_bytes_memory_limit").to_number()); + 11, sizeStats.at("model_bytes_memory_limit").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorized_doc_count")); - BOOST_REQUIRE_EQUAL(14, sizeStats.at("categorized_doc_count").to_number()); + BOOST_REQUIRE_EQUAL(12, sizeStats.at("categorized_doc_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("total_category_count")); - BOOST_REQUIRE_EQUAL(15, sizeStats.at("total_category_count").to_number()); + BOOST_REQUIRE_EQUAL(13, sizeStats.at("total_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("frequent_category_count")); BOOST_REQUIRE_EQUAL( - 16, sizeStats.at("frequent_category_count").to_number()); + 14, sizeStats.at("frequent_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("rare_category_count")); - BOOST_REQUIRE_EQUAL(17, sizeStats.at("rare_category_count").to_number()); + BOOST_REQUIRE_EQUAL(15, sizeStats.at("rare_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("dead_category_count")); - BOOST_REQUIRE_EQUAL(18, sizeStats.at("dead_category_count").to_number()); + BOOST_REQUIRE_EQUAL(16, sizeStats.at("dead_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("failed_category_count")); - BOOST_REQUIRE_EQUAL(19, sizeStats.at("failed_category_count").to_number()); + BOOST_REQUIRE_EQUAL(17, sizeStats.at("failed_category_count").to_number()); BOOST_TEST_REQUIRE(sizeStats.contains("categorization_status")); BOOST_REQUIRE_EQUAL("warn", sizeStats.at("categorization_status").as_string()); } diff --git a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc index f2c22a41de..ff404b7f91 100644 --- a/lib/api/unittest/CModelSnapshotJsonWriterTest.cc +++ b/lib/api/unittest/CModelSnapshotJsonWriterTest.cc @@ -36,8 +36,6 @@ BOOST_AUTO_TEST_CASE(testWrite) { 20000, // bytes used (adjusted) 30000, // peak bytes used 60000, // peak bytes used (adjusted) - 409600, // System memory used (rss) - 413696, // Max system memory used (max rss) 3, // # by fields 1, // # partition fields 150, // # over fields diff --git a/lib/core/CProcessStats_Linux.cc b/lib/core/CProcessStats_Linux.cc index e3635e279f..e5ab8cdfd0 100644 --- a/lib/core/CProcessStats_Linux.cc +++ b/lib/core/CProcessStats_Linux.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/lib/core/CProcessStats_MacOSX.cc b/lib/core/CProcessStats_MacOSX.cc index 61dc9fca66..2f1d02c2e2 100644 --- a/lib/core/CProcessStats_MacOSX.cc +++ b/lib/core/CProcessStats_MacOSX.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/lib/core/CProcessStats_Windows.cc b/lib/core/CProcessStats_Windows.cc index 933952e1f2..7c612270b3 100644 --- a/lib/core/CProcessStats_Windows.cc +++ b/lib/core/CProcessStats_Windows.cc @@ -11,7 +11,6 @@ #include #include -#include #include #include diff --git a/lib/core/unittest/CLoggerTest.cc b/lib/core/unittest/CLoggerTest.cc index 1abec95da0..8667aa1430 100644 --- a/lib/core/unittest/CLoggerTest.cc +++ b/lib/core/unittest/CLoggerTest.cc @@ -62,7 +62,7 @@ std::function makeReader(std::ostringstream& loggedData) { return; } } - BOOST_FAIL("Failed to connect to logging pipe within a reasonable time"); + BOOST_TEST_CHECK(false, "Failed to connect to logging pipe within a reasonable time"); }; } diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 39aef5e07e..446c978558 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -158,35 +158,36 @@ BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); } -BOOST_AUTO_TEST_CASE(testServerIsCWriter) { - ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; - BOOST_TEST_REQUIRE(threadReader.start()); - - std::atomic_bool dummy{false}; - ml::core::CNamedPipeFactory::TFileP file{ - ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; - BOOST_TEST_REQUIRE(file); - - std::size_t charsLeft{TEST_SIZE}; - std::size_t blockSize{7}; - while (charsLeft > 0) { - if (blockSize > charsLeft) { - blockSize = charsLeft; - } - BOOST_TEST_REQUIRE(std::fputs(std::string(blockSize, TEST_CHAR).c_str(), - file.get()) >= 0); - charsLeft -= blockSize; - } - - file.reset(); - - BOOST_TEST_REQUIRE(threadReader.waitForFinish()); - BOOST_TEST_REQUIRE(threadReader.attemptsTaken() <= MAX_ATTEMPTS); - BOOST_TEST_REQUIRE(threadReader.streamWentBad() == false); - - BOOST_REQUIRE_EQUAL(TEST_SIZE, threadReader.data().length()); - BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); -} +// BOOST_AUTO_TEST_CASE(testServerIsCWriter) { +// ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; +// BOOST_TEST_REQUIRE(threadReader.start()); +// +// std::atomic_bool dummy{false}; +// ml::core::CNamedPipeFactory::TFileP file{ +// ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; +// BOOST_TEST_REQUIRE(file); +// +// sleep(1); +// std::size_t charsLeft{TEST_SIZE}; +// std::size_t blockSize{7}; +// while (charsLeft > 0) { +// if (blockSize > charsLeft) { +// blockSize = charsLeft; +// } +// BOOST_TEST_REQUIRE(std::fputs(std::string(blockSize, TEST_CHAR).c_str(), +// file.get()) >= 0); +// charsLeft -= blockSize; +// } +// +// file.reset(); +// +// BOOST_TEST_REQUIRE(threadReader.waitForFinish()); +// BOOST_TEST_REQUIRE(threadReader.attemptsTaken() <= MAX_ATTEMPTS); +// BOOST_TEST_REQUIRE(threadReader.streamWentBad() == false); +// +// BOOST_REQUIRE_EQUAL(TEST_SIZE, threadReader.data().length()); +// BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); +// } BOOST_AUTO_TEST_CASE(testCancelBlock) { CThreadBlockCanceller cancellerThread{ml::core::CThread::currentThreadId()}; diff --git a/lib/model/CMakeLists.txt b/lib/model/CMakeLists.txt index 37c8c1cc5c..73dd74e4d0 100644 --- a/lib/model/CMakeLists.txt +++ b/lib/model/CMakeLists.txt @@ -75,7 +75,7 @@ ml_add_library(MlModel SHARED CSampleCounts.cc CSearchKey.cc CSimpleCountDetector.cc - CSystemMemoryUsage.cc + CProcessMemoryUsage.cc CTokenListCategory.cc CTokenListDataCategorizerBase.cc CTokenListReverseSearchCreator.cc diff --git a/lib/model/CSystemMemoryUsage.cc b/lib/model/CProcessMemoryUsage.cc similarity index 52% rename from lib/model/CSystemMemoryUsage.cc rename to lib/model/CProcessMemoryUsage.cc index ee86e22f44..37fb296345 100644 --- a/lib/model/CSystemMemoryUsage.cc +++ b/lib/model/CProcessMemoryUsage.cc @@ -9,19 +9,13 @@ * limitation. */ -#include +#include namespace ml { namespace model { -// On platforms other than Linux the system memory usage is that provided - the estimated size of the models. -std::size_t CSystemMemoryUsage::operator()(std::size_t memSize) const { - return memSize; -} - -// On platforms other than Linux the system memory is estimated. It must be adjusted before being reported -std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, - const TMemoryAdjuster& memAdjuster) { - return memAdjuster(usage); +// On platforms other than Linux the process memory usage is the estimated size of the models. +CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::memoryStrategy() { + return EMemoryStrategy::E_Estimated; } } } diff --git a/lib/model/CProcessMemoryUsage_Linux.cc b/lib/model/CProcessMemoryUsage_Linux.cc new file mode 100644 index 0000000000..8c5306e069 --- /dev/null +++ b/lib/model/CProcessMemoryUsage_Linux.cc @@ -0,0 +1,21 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the following additional limitation. Functionality enabled by the + * files subject to the Elastic License 2.0 may only be used in production when + * invoked by an Elasticsearch process with a license key installed that permits + * use of machine learning features. You may not use this file except in + * compliance with the Elastic License 2.0 and the foregoing additional + * limitation. + */ + +#include + +namespace ml { +namespace model { +// On Linux the process memory usage is determined by the OS. +CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::memoryStrategy() { + return EMemoryStrategy::E_System; +} +} +} diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index fff68a0014..6529fa4656 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include @@ -380,13 +380,11 @@ CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); + res.s_AdjustedUsage = this->applyMemoryStrategy(res.s_Usage); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); - res.s_SystemMemoryUsage = core::CProcessStats::residentSetSize(); - res.s_MaxSystemMemoryUsage = core::CProcessStats::maxResidentSetSize(); - res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh; + res.s_AdjustedPeakUsage = this->applyMemoryStrategy(res.s_PeakUsage); + res.s_BytesMemoryLimit = this->getBytesMemoryLimit(); res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; std::uint64_t assignmentMemoryBasis{ @@ -404,24 +402,33 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { return res; } -std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { - const std::size_t persistenceMemoryIncreaseFactor = - this->persistenceMemoryIncreaseFactor(); +std::size_t CResourceMonitor::applyMemoryStrategy(std::size_t usage) const { + std::size_t modifiedUsage{0}; + switch (CProcessMemoryUsage::memoryStrategy()) { + case CProcessMemoryUsage::EMemoryStrategy::E_Estimated: { + modifiedUsage = this->adjustedUsage(usage); + break; + } + case CProcessMemoryUsage::EMemoryStrategy::E_System: { + modifiedUsage = core::CProcessStats::maxResidentSetSize(); + break; + } + default: { LOG_WARN(<< "Unknown memory strategy"); } + } + return modifiedUsage; +} - return CSystemMemoryUsage::maybeAdjustUsage(usage, [ - &byteLimitMargin = m_ByteLimitMargin, &persistenceMemoryIncreaseFactor - ](std::size_t usage_) { - // On platforms that estimate the memory usage, it is scaled by the inverse of the byte limit margin. - // This gives the user a fairer indication of how close the job is to hitting - // the model memory limit in a concise manner (as the limit is scaled down by - // the margin during the beginning period of the job's existence). - std::size_t adjustedUsage{ - static_cast(static_cast(usage_) / byteLimitMargin)}; +std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const { + // We scale the reported memory usage by the inverse of the byte limit margin. + // This gives the user a fairer indication of how close the job is to hitting + // the model memory limit in a concise manner (as the limit is scaled down by + // the margin during the beginning period of the job's existence). + std::size_t adjustedUsage{ + static_cast(static_cast(usage) / m_ByteLimitMargin)}; - adjustedUsage *= persistenceMemoryIncreaseFactor; + adjustedUsage *= this->persistenceMemoryIncreaseFactor(); - return adjustedUsage; - }); + return adjustedUsage; } std::size_t CResourceMonitor::persistenceMemoryIncreaseFactor() const { @@ -497,10 +504,9 @@ std::size_t CResourceMonitor::lowLimit() const { } std::size_t CResourceMonitor::totalMemory() const { - CSystemMemoryUsage systemMemoryUsage; - return systemMemoryUsage(m_MonitoredResourceCurrentMemory + m_ExtraMemory + - static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage))); + return m_MonitoredResourceCurrentMemory + m_ExtraMemory + + static_cast(core::CProgramCounters::counter( + counter_t::E_TSADOutputMemoryAllocatorUsage)); } std::size_t CResourceMonitor::systemMemory() { diff --git a/lib/model/CSystemMemoryUsage_Linux.cc b/lib/model/CSystemMemoryUsage_Linux.cc deleted file mode 100644 index 76a3ca87a8..0000000000 --- a/lib/model/CSystemMemoryUsage_Linux.cc +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the Elastic License - * 2.0 and the following additional limitation. Functionality enabled by the - * files subject to the Elastic License 2.0 may only be used in production when - * invoked by an Elasticsearch process with a license key installed that permits - * use of machine learning features. You may not use this file except in - * compliance with the Elastic License 2.0 and the foregoing additional - * limitation. - */ - -#include - -#include - -namespace ml { -namespace model { -// On Linux the system memory usage is actually that determined by the OS. -// The estimated value provided is ignored. -std::size_t CSystemMemoryUsage::operator()(std::size_t /*memSize*/) const { - return core::CProcessStats::maxResidentSetSize(); -} - -std::size_t CSystemMemoryUsage::maybeAdjustUsage(std::size_t usage, - const TMemoryAdjuster& /*memAdjuster*/) { - // On Linux we use the actual system memory usage, rather tha an estimated value, so there is no need to adjust it. - return usage; -} -} -} diff --git a/lib/model/unittest/CResourceMonitorTest.cc b/lib/model/unittest/CResourceMonitorTest.cc index c17c644c80..b11c79aca5 100644 --- a/lib/model/unittest/CResourceMonitorTest.cc +++ b/lib/model/unittest/CResourceMonitorTest.cc @@ -549,10 +549,6 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory <= m_ReportedModelSizeStats.s_SystemMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_SystemMemoryUsage); - monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -560,10 +556,6 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - monitor.addExtraMemory(-50); monitor.updateMoments(monitor.totalMemory(), 0, 1); @@ -571,20 +563,12 @@ BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) { BOOST_REQUIRE_EQUAL(baseTotalMemory + 50, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 100, m_ReportedModelSizeStats.s_PeakUsage); - BOOST_TEST_REQUIRE(baseTotalMemory + 100 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - monitor.addExtraMemory(100); monitor.updateMoments(monitor.totalMemory(), 0, 1); monitor.sendMemoryUsageReport(0, 1); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_Usage); BOOST_REQUIRE_EQUAL(baseTotalMemory + 150, m_ReportedModelSizeStats.s_PeakUsage); - - BOOST_TEST_REQUIRE(baseTotalMemory + 150 <= m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); - BOOST_TEST_REQUIRE(m_ReportedModelSizeStats.s_PeakUsage <= - m_ReportedModelSizeStats.s_MaxSystemMemoryUsage); } BOOST_FIXTURE_TEST_CASE(testUpdateMoments, CTestFixture) { From fa9c4fab8e46f12fa291bf4588687da9ae28e4fa Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 12 Jun 2025 14:46:20 +1200 Subject: [PATCH 20/49] Small tidy up of CProcessMemoryUsage.cc * Replace static method with static const value. --- include/model/CProcessMemoryUsage.h | 14 +++----------- lib/model/CProcessMemoryUsage.cc | 4 +--- lib/model/CProcessMemoryUsage_Linux.cc | 5 ++--- lib/model/CResourceMonitor.cc | 2 +- 4 files changed, 7 insertions(+), 18 deletions(-) diff --git a/include/model/CProcessMemoryUsage.h b/include/model/CProcessMemoryUsage.h index 6a449ebf84..8ea1a2057c 100644 --- a/include/model/CProcessMemoryUsage.h +++ b/include/model/CProcessMemoryUsage.h @@ -29,18 +29,10 @@ class MODEL_EXPORT CProcessMemoryUsage { public: enum class EMemoryStrategy { E_Estimated, E_System }; -public: - CProcessMemoryUsage() = default; - ~CProcessMemoryUsage() = default; - - CProcessMemoryUsage(const CProcessMemoryUsage&) = delete; - CProcessMemoryUsage(CProcessMemoryUsage&&) = delete; - CProcessMemoryUsage& operator=(const CProcessMemoryUsage&) = delete; - CProcessMemoryUsage& operator=(CProcessMemoryUsage&&) = delete; + static const EMemoryStrategy MEMORY_STRATEGY; - //! Return the strategy used to calculate the memory used by the current process. - //! \return Strategy used to calculate the process memory based on the current OS. - static EMemoryStrategy memoryStrategy(); +public: + CProcessMemoryUsage() = delete; }; } } diff --git a/lib/model/CProcessMemoryUsage.cc b/lib/model/CProcessMemoryUsage.cc index 37fb296345..6fafd92323 100644 --- a/lib/model/CProcessMemoryUsage.cc +++ b/lib/model/CProcessMemoryUsage.cc @@ -14,8 +14,6 @@ namespace ml { namespace model { // On platforms other than Linux the process memory usage is the estimated size of the models. -CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::memoryStrategy() { - return EMemoryStrategy::E_Estimated; -} +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{EMemoryStrategy::E_Estimated}; } } diff --git a/lib/model/CProcessMemoryUsage_Linux.cc b/lib/model/CProcessMemoryUsage_Linux.cc index 8c5306e069..95ef5af712 100644 --- a/lib/model/CProcessMemoryUsage_Linux.cc +++ b/lib/model/CProcessMemoryUsage_Linux.cc @@ -13,9 +13,8 @@ namespace ml { namespace model { + // On Linux the process memory usage is determined by the OS. -CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::memoryStrategy() { - return EMemoryStrategy::E_System; -} +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{EMemoryStrategy::E_System}; } } diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 6529fa4656..205fd76fa7 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -404,7 +404,7 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { std::size_t CResourceMonitor::applyMemoryStrategy(std::size_t usage) const { std::size_t modifiedUsage{0}; - switch (CProcessMemoryUsage::memoryStrategy()) { + switch (CProcessMemoryUsage::MEMORY_STRATEGY) { case CProcessMemoryUsage::EMemoryStrategy::E_Estimated: { modifiedUsage = this->adjustedUsage(usage); break; From 014be1ccb22d59995e7054c54c3987f6331cdc95 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 13 Jun 2025 10:44:11 +1200 Subject: [PATCH 21/49] Small tidy up * uncomment test code * formatting --- lib/core/unittest/CNamedPipeFactoryTest.cc | 60 +++++++++++----------- lib/model/CProcessMemoryUsage.cc | 3 +- lib/model/CProcessMemoryUsage_Linux.cc | 3 +- 3 files changed, 34 insertions(+), 32 deletions(-) diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 446c978558..6ad24c5829 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -158,36 +158,36 @@ BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); } -// BOOST_AUTO_TEST_CASE(testServerIsCWriter) { -// ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; -// BOOST_TEST_REQUIRE(threadReader.start()); -// -// std::atomic_bool dummy{false}; -// ml::core::CNamedPipeFactory::TFileP file{ -// ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; -// BOOST_TEST_REQUIRE(file); -// -// sleep(1); -// std::size_t charsLeft{TEST_SIZE}; -// std::size_t blockSize{7}; -// while (charsLeft > 0) { -// if (blockSize > charsLeft) { -// blockSize = charsLeft; -// } -// BOOST_TEST_REQUIRE(std::fputs(std::string(blockSize, TEST_CHAR).c_str(), -// file.get()) >= 0); -// charsLeft -= blockSize; -// } -// -// file.reset(); -// -// BOOST_TEST_REQUIRE(threadReader.waitForFinish()); -// BOOST_TEST_REQUIRE(threadReader.attemptsTaken() <= MAX_ATTEMPTS); -// BOOST_TEST_REQUIRE(threadReader.streamWentBad() == false); -// -// BOOST_REQUIRE_EQUAL(TEST_SIZE, threadReader.data().length()); -// BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); -// } +BOOST_AUTO_TEST_CASE(testServerIsCWriter) { + ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; + BOOST_TEST_REQUIRE(threadReader.start()); + + std::atomic_bool dummy{false}; + ml::core::CNamedPipeFactory::TFileP file{ + ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; + BOOST_TEST_REQUIRE(file); + + sleep(1); + std::size_t charsLeft{TEST_SIZE}; + std::size_t blockSize{7}; + while (charsLeft > 0) { + if (blockSize > charsLeft) { + blockSize = charsLeft; + } + BOOST_TEST_REQUIRE(std::fputs(std::string(blockSize, TEST_CHAR).c_str(), + file.get()) >= 0); + charsLeft -= blockSize; + } + + file.reset(); + + BOOST_TEST_REQUIRE(threadReader.waitForFinish()); + BOOST_TEST_REQUIRE(threadReader.attemptsTaken() <= MAX_ATTEMPTS); + BOOST_TEST_REQUIRE(threadReader.streamWentBad() == false); + + BOOST_REQUIRE_EQUAL(TEST_SIZE, threadReader.data().length()); + BOOST_REQUIRE_EQUAL(std::string(TEST_SIZE, TEST_CHAR), threadReader.data()); +} BOOST_AUTO_TEST_CASE(testCancelBlock) { CThreadBlockCanceller cancellerThread{ml::core::CThread::currentThreadId()}; diff --git a/lib/model/CProcessMemoryUsage.cc b/lib/model/CProcessMemoryUsage.cc index 6fafd92323..176a9825d2 100644 --- a/lib/model/CProcessMemoryUsage.cc +++ b/lib/model/CProcessMemoryUsage.cc @@ -14,6 +14,7 @@ namespace ml { namespace model { // On platforms other than Linux the process memory usage is the estimated size of the models. -const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{EMemoryStrategy::E_Estimated}; +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{ + EMemoryStrategy::E_Estimated}; } } diff --git a/lib/model/CProcessMemoryUsage_Linux.cc b/lib/model/CProcessMemoryUsage_Linux.cc index 95ef5af712..ff3a52151b 100644 --- a/lib/model/CProcessMemoryUsage_Linux.cc +++ b/lib/model/CProcessMemoryUsage_Linux.cc @@ -15,6 +15,7 @@ namespace ml { namespace model { // On Linux the process memory usage is determined by the OS. -const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{EMemoryStrategy::E_System}; +const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{ + EMemoryStrategy::E_System}; } } From 41c02defea5209dc5ef6e387308665785291bdf7 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 17 Jun 2025 15:43:30 +1200 Subject: [PATCH 22/49] On Linux, return system memory (max resident set size) from CResourceMonitor.totalMemory() While this does seem the correct thing to do, it does lead to a large number of broken unit test cases on Linux. These test failures fall largely into two groups * Those that fail due to the assumption that totalMemory returns an estimate of model memory, and as such totalMemory can go up and down in the space of a test. Compare this system memory, which by its nature will never decrease over the lifetime of a process. * And those that fail, not due to any inherent assumptions of there own, but due to the effects of other tests run prior in the process. These prior run tests increase the system memory value such that memory resource limits are now hit. In other words the test cases are now not truly independent of one another. Commiting this change now for full visibility and for a point of further discussion. --- lib/model/CResourceMonitor.cc | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 205fd76fa7..83ec624b39 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -380,10 +380,10 @@ CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->applyMemoryStrategy(res.s_Usage); + res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); res.s_PeakUsage = static_cast( core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->applyMemoryStrategy(res.s_PeakUsage); + res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); res.s_BytesMemoryLimit = this->getBytesMemoryLimit(); res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -406,7 +406,7 @@ std::size_t CResourceMonitor::applyMemoryStrategy(std::size_t usage) const { std::size_t modifiedUsage{0}; switch (CProcessMemoryUsage::MEMORY_STRATEGY) { case CProcessMemoryUsage::EMemoryStrategy::E_Estimated: { - modifiedUsage = this->adjustedUsage(usage); + modifiedUsage = usage; break; } case CProcessMemoryUsage::EMemoryStrategy::E_System: { @@ -504,9 +504,9 @@ std::size_t CResourceMonitor::lowLimit() const { } std::size_t CResourceMonitor::totalMemory() const { - return m_MonitoredResourceCurrentMemory + m_ExtraMemory + - static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage)); + return this->applyMemoryStrategy(m_MonitoredResourceCurrentMemory + m_ExtraMemory + + static_cast(core::CProgramCounters::counter( + counter_t::E_TSADOutputMemoryAllocatorUsage))); } std::size_t CResourceMonitor::systemMemory() { From 50c535ffa9751713d9b1c0eff85dbb5710ba1cbb Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 27 Jun 2025 16:20:02 +1200 Subject: [PATCH 23/49] [ML] Add a script to run each unit test separately Add a script to provide a wrapper around the call to "cmake" that runs the test cases and provides some flexibility as to how the tests should be run in terms of how they are spread across processes. This is necessary when trying to isolate the impact memory usage of tests have upon one another. --- 3rd_party/3rd_party.cmake | 19 +++-- 3rd_party/CMakeLists.txt | 2 +- cmake/compiler/clang.cmake | 2 +- cmake/functions.cmake | 9 ++ cmake/test-runner.cmake | 53 ++++++++++-- dev-tools/docker/docker_entrypoint.sh | 2 +- generate_test_names.py | 77 ++++++++++++++++++ run_tests_as_seperate_processes.sh | 113 ++++++++++++++++++++++++++ test/CMakeLists.txt | 9 ++ 9 files changed, 269 insertions(+), 17 deletions(-) create mode 100755 generate_test_names.py create mode 100755 run_tests_as_seperate_processes.sh diff --git a/3rd_party/3rd_party.cmake b/3rd_party/3rd_party.cmake index 2c9d796221..5878d7f86b 100644 --- a/3rd_party/3rd_party.cmake +++ b/3rd_party/3rd_party.cmake @@ -25,6 +25,10 @@ if(NOT INSTALL_DIR) message(FATAL_ERROR "INSTALL_DIR not specified") endif() +STRING(REPLACE "//" "/" INSTALL_DIR ${INSTALL_DIR}) + +message(STATUS "3rd_party: CMAKE_CXX_COMPILER_VERSION_MAJOR=${CMAKE_CXX_COMPILER_VERSION_MAJOR}") + string(TOLOWER ${CMAKE_HOST_SYSTEM_NAME} HOST_SYSTEM_NAME) message(STATUS "3rd_party: HOST_SYSTEM_NAME=${HOST_SYSTEM_NAME}") @@ -43,7 +47,9 @@ set(ARCH ${HOST_SYSTEM_PROCESSOR}) if ("${HOST_SYSTEM_NAME}" STREQUAL "darwin") message(STATUS "3rd_party: Copying macOS 3rd party libraries") set(BOOST_LOCATION "/usr/local/lib") - set(BOOST_COMPILER "clang") + set(BOOST_COMPILER "clang-darwin${CMAKE_CXX_COMPILER_VERSION_MAJOR}") + message(STATUS "3rd_party: BOOST_COMPILER=${BOOST_COMPILER}") + if( "${ARCH}" STREQUAL "x86_64" ) set(BOOST_ARCH "x64") else() @@ -63,7 +69,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") if(NOT DEFINED ENV{CPP_CROSS_COMPILE} OR "$ENV{CPP_CROSS_COMPILE}" STREQUAL "") message(STATUS "3rd_party: NOT cross compiling. Copying Linux 3rd party libraries") set(BOOST_LOCATION "/usr/local/gcc133/lib") - set(BOOST_COMPILER "gcc") + set(BOOST_COMPILER "gcc${CMAKE_CXX_COMPILER_VERSION_MAJOR}") if( "${ARCH}" STREQUAL "aarch64" ) set(BOOST_ARCH "a64") else() @@ -93,7 +99,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") message(STATUS "3rd_party: Cross compile for macosx: Copying macOS 3rd party libraries") set(SYSROOT "/usr/local/sysroot-x86_64-apple-macosx10.14") set(BOOST_LOCATION "${SYSROOT}/usr/local/lib") - set(BOOST_COMPILER "clang") + set(BOOST_COMPILER "clang-darwin${CMAKE_CXX_COMPILER_VERSION_MAJOR}") set(BOOST_EXTENSION "mt-x64-1_86.dylib") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION) @@ -108,7 +114,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") message(STATUS "3rd_party: Cross compile for linux-aarch64: Copying Linux 3rd party libraries") set(SYSROOT "/usr/local/sysroot-$ENV{CPP_CROSS_COMPILE}-linux-gnu") set(BOOST_LOCATION "${SYSROOT}/usr/local/gcc133/lib") - set(BOOST_COMPILER "gcc") + set(BOOST_COMPILER "gcc${CMAKE_CXX_COMPILER_VERSION_MAJOR}") if("$ENV{CPP_CROSS_COMPILE}" STREQUAL "aarch64") set(BOOST_ARCH "a64") else() @@ -188,6 +194,9 @@ function(install_libs _target _source_dir _prefix _postfix) set(LIBRARIES ${ARGN}) + message(STATUS "_target=${_target} _source_dir=${_source_dir} _prefix=${_prefix} _postfix=${_postfix} LIBRARIES=${LIBRARIES}") + + file(GLOB _LIBS ${_source_dir}/*${_prefix}*${_postfix}) if(_LIBS) @@ -219,7 +228,7 @@ function(install_libs _target _source_dir _prefix _postfix) endif() file(CHMOD ${INSTALL_DIR}/${_LIB} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) else() - file(COPY ${_RESOLVED_PATH} DESTINATION ${INSTALL_DIR}) + file(COPY ${_RESOLVED_PATH} DESTINATION "${INSTALL_DIR}") file(CHMOD ${INSTALL_DIR}/${_RESOLVED_LIB} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) endif() endforeach() diff --git a/3rd_party/CMakeLists.txt b/3rd_party/CMakeLists.txt index ffdb8e093e..f2b092f913 100644 --- a/3rd_party/CMakeLists.txt +++ b/3rd_party/CMakeLists.txt @@ -22,7 +22,7 @@ add_custom_target(licenses ALL # as part of the CMake configuration step - avoiding # the need for it to be done on every build execute_process( - COMMAND ${CMAKE_COMMAND} -DINSTALL_DIR=${INSTALL_DIR} -P ./3rd_party.cmake + COMMAND ${CMAKE_COMMAND} -DINSTALL_DIR=${INSTALL_DIR} -DCMAKE_CXX_COMPILER_VERSION_MAJOR=${CMAKE_CXX_COMPILER_VERSION_MAJOR} -P ./3rd_party.cmake WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/cmake/compiler/clang.cmake b/cmake/compiler/clang.cmake index 1749ad0a89..cc4042dbc7 100644 --- a/cmake/compiler/clang.cmake +++ b/cmake/compiler/clang.cmake @@ -16,7 +16,7 @@ set(CMAKE_RANLIB "ranlib") set(CMAKE_STRIP "strip") -list(APPEND ML_C_FLAGS +list(APPEND ML_C_FLAGS ${CROSS_FLAGS} ${ARCHCFLAGS} "-fstack-protector" diff --git a/cmake/functions.cmake b/cmake/functions.cmake index c39a86089a..ea8070a712 100644 --- a/cmake/functions.cmake +++ b/cmake/functions.cmake @@ -392,6 +392,13 @@ function(ml_add_test_executable _target) COMMENT "Running test: ml_test_${_target}" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) + + add_custom_target(test_${_target}_individually + DEPENDS ml_test_${_target} + COMMAND ${CMAKE_SOURCE_DIR}/run_tests_as_seperate_processes.sh ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR} test_${_target} + COMMENT "Running test: ml_test_${_target}_individually" + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + ) endif() endfunction() @@ -420,8 +427,10 @@ function(ml_add_test _directory _target) add_subdirectory(../${_directory} ${_directory}) list(APPEND ML_BUILD_TEST_DEPENDS ml_test_${_target}) list(APPEND ML_TEST_DEPENDS test_${_target}) + list(APPEND ML_TEST_INDIVIDUALLY_DEPENDS test_${_target}_individually) set(ML_BUILD_TEST_DEPENDS ${ML_BUILD_TEST_DEPENDS} PARENT_SCOPE) set(ML_TEST_DEPENDS ${ML_TEST_DEPENDS} PARENT_SCOPE) + set(ML_TEST_INDIVIDUALLY_DEPENDS ${ML_TEST_INDIVIDUALLY_DEPENDS} PARENT_SCOPE) endfunction() diff --git a/cmake/test-runner.cmake b/cmake/test-runner.cmake index 2bba0cb5c4..7c13d59f0c 100644 --- a/cmake/test-runner.cmake +++ b/cmake/test-runner.cmake @@ -9,6 +9,13 @@ # limitation. # +execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f ${TEST_DIR}/*.out) +execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f ${TEST_DIR}/*.failed) +execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f boost_test_results*.xml) +execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f boost_test_results*.junit) + +set(INDIVIDUAL_TEST "CAnnotationJsonWriterTest/testWrite") + if(TEST_NAME STREQUAL "ml_test_seccomp") execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} --logger=HRF,all --report_format=HRF --show_progress=no --no_color_output OUTPUT_FILE ${TEST_DIR}/${TEST_NAME}.out ERROR_FILE ${TEST_DIR}/${TEST_NAME}.out RESULT_VARIABLE TEST_SUCCESS) else() @@ -17,22 +24,50 @@ else() string(REPLACE " " ";" TEST_FLAGS $ENV{TEST_FLAGS}) endif() + + set(SAFE_TEST_NAME "") + set(OUTPUT_FILE "${TEST_DIR}/${TEST_NAME}.out") + set(FAILED_FILE "${TEST_DIR}/${TEST_NAME}.failed") # Special case for specifying a subset of tests to run (can be regex) if (DEFINED ENV{TESTS} AND NOT "$ENV{TESTS}" STREQUAL "") set(TESTS "--run_test=$ENV{TESTS}") + string(REGEX REPLACE "[^a-zA-Z0-9_]" "_" SAFE_TEST_NAME "$ENV{TESTS}") + set(SAFE_TEST_NAME "_${SAFE_TEST_NAME}") endif() - # If any special command line args are present run the tests in the foreground + # If env var RUN_BOOST_TESTS_IN_BACKGROUND is defined run the tests in the background + message(STATUS "RUN_BOOST_TESTS_IN_BACKGROUND=$ENV{RUN_BOOST_TESTS_IN_BACKGROUND}") + if (DEFINED TEST_FLAGS OR DEFINED TESTS) - message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS}") - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) + string(REPLACE "boost_test_results" "boost_test_results${SAFE_TEST_NAME}" BOOST_TEST_OUTPUT_FORMAT_FLAGS "$ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS}") + set(OUTPUT_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.out") + set(FAILED_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.failed") + + if(DEFINED ENV{RUN_BOOST_TESTS_IN_BACKGROUND}) + message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output") + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) + message(STATUS "TESTS EXITED WITH SUCCESS ${TEST_SUCCESS}") + else() + message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS}") + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) + message(STATUS "TESTS EXITED WITH SUCCESS ${TEST_SUCCESS}") + endif() else() - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} $ENV{TEST_FLAGS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} - --no_color_output OUTPUT_FILE ${TEST_DIR}/${TEST_NAME}.out ERROR_FILE ${TEST_DIR}/${TEST_NAME}.out RESULT_VARIABLE TEST_SUCCESS) + if(DEFINED ENV{RUN_BOOST_TESTS_IN_BACKGROUND}) + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} $ENV{TEST_FLAGS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} + --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) + else() + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) + endif() + endif() + + if (NOT TEST_SUCCESS EQUAL 0) + if (EXISTS ${TEST_DIR}/${TEST_NAME}) + execute_process(COMMAND ${CMAKE_COMMAND} -E cat ${OUTPUT_FILE}) + file(WRITE "${TEST_DIR}/${FAILED_FILE}" "") + endif() + message(FATAL_ERROR "Exiting with status ${TEST_SUCCESS}") endif() -endif() -if (NOT TEST_SUCCESS EQUAL 0) - execute_process(COMMAND ${CMAKE_COMMAND} -E cat ${TEST_DIR}/${TEST_NAME}.out) - file(WRITE "${TEST_DIR}/${TEST_NAME}.failed" "") endif() + diff --git a/dev-tools/docker/docker_entrypoint.sh b/dev-tools/docker/docker_entrypoint.sh index 475c05344f..dba029e51d 100755 --- a/dev-tools/docker/docker_entrypoint.sh +++ b/dev-tools/docker/docker_entrypoint.sh @@ -66,6 +66,6 @@ if [ "x$1" = "x--test" ] ; then # failure is the unit tests, and then the detailed test results can be # copied from the image echo passed > build/test_status.txt - cmake --build cmake-build-docker ${CMAKE_VERBOSE} -j`nproc` -t test || echo failed > build/test_status.txt + cmake --build cmake-build-docker ${CMAKE_VERBOSE} -j`nproc` -t test_individually || echo failed > build/test_status.txt fi diff --git a/generate_test_names.py b/generate_test_names.py new file mode 100755 index 0000000000..805c169d6e --- /dev/null +++ b/generate_test_names.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python3 +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. + +# This script provides a wrapper around a call to a BOOST test executable +# to return a formatted list of tests such that each fully qualified test +# name would be in a form suitable to being passed to BOOST test's "--run_test" +# parameter. +# It takes precisely one positional parameter, the path to a BOOST test executable. + + +import argparse +import re +import subprocess +import sys + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument('exec_path', help='The path to the ml_test suite executable') + return parser.parse_args() + +def get_qualified_test_names(executable_path): + + cmd = [args.exec_path, "--list_content"] + process = subprocess.run(cmd, capture_output=True, text=True, check=True) + output_lines = process.stderr.splitlines() + + test_names = [] + current_suite_stack = [] + + for line in output_lines: + match_suite = re.match(r'^( *)(C.*Test)\*$', line) + match_case = re.match(r'^( *)(test.*)\*$', line) + + if match_suite: + indent_level = len(match_suite.group(1)) + suite_name = match_suite.group(2) + + # Pop suites from stack if current indent is less or equal + while current_suite_stack and len(current_suite_stack[-1][0]) >= indent_level: + current_suite_stack.pop() + + current_suite_stack.append((match_suite.group(1), suite_name)) + elif match_case: + indent_level = len(match_case.group(1)) + case_name = match_case.group(2) + + # Pop suites from stack if current indent is less (for sibling suites/cases) + while current_suite_stack and len(current_suite_stack[-1][0]) >= indent_level: + current_suite_stack.pop() + + full_path = "/".join([s[1] for s in current_suite_stack] + [case_name]) + test_names.append(full_path) + return test_names + +if __name__ == "__main__": + args = parse_arguments() + try: + names = get_qualified_test_names(args.exec_path) + for name in names: + print(name) + except subprocess.CalledProcessError as e: + print(f"Error listing tests: {e.stderr}", file=sys.stderr) + sys.exit(1) + except FileNotFoundError: + print(f"Error: Test executable '{args.exec_path}' not found.", file=sys.stderr) + sys.exit(1) + except Exception as e: + print(f"An unexpected error occurred: {e}", file=sys.stderr) + sys.exit(1) diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh new file mode 100755 index 0000000000..6f565ce7c6 --- /dev/null +++ b/run_tests_as_seperate_processes.sh @@ -0,0 +1,113 @@ +#!/bin/bash +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. +# + +# This script ultimately gets called from within the docker entry point script. +# It provides a wrapper around the call to "cmake" that runs the test cases +# and provides some flexibility as to how the tests should be run in terms of how they +# are spread across processes. This is necessary when trying to isolate the impact memory +# usage of tests have upon one another. +# +# It is intended to be called as part of the CI build/test process but should be able to be run manually. +# +# It should be called with 3 parameters +# cmake_build_dir: The directory that cmake is using for build outputs, i.e. that passed to cmake's --build argument +# cmake_current_binary_dir: The directory containing the current test suite executable e.g. /test/lib/api/unittest +# test_suite: The name of the test suite to run, minus any leading "ml_", e.g. "test_api" +# +# In addition to the required parameters there are several environment variables that control the script's behaviour +# BOOST_TEST_MAX_ARGS: The maximum number of test cases to be passed off to a sub shell +# BOOST_TEST_MAX_PROCS: The maximum number of sub shells to use +# BOOST_TEST_MIXED_MODE: If set to "true" then rather than iterating over each individual test passed to a sub-shell +# run them all in the same BOOST test executable process. +# + +if [ $# -lt 3 ]; then + echo "Usage: $0 " + echo "e.g.: $0 ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local/test/lib/api/unittest test_api" + exit +fi + +export BUILD_DIR=$1 +export BINARY_DIR=$2 +export TEST_SUITE=$3 + +export TEST_EXECUTABLE="$2/ml_$3" +export LOG_DIR="$2/test_logs" + +MAX_ARGS=2 +MAX_PROCS=4 + +if [[ -n "$BOOST_TEST_MAX_ARGS" ]]; then + MAX_ARGS=$BOOST_TEST_MAX_ARGS +fi + +if [[ -n "$BOOST_TEST_MAX_PROCS" ]]; then + MAX_PROCS=$BOOST_TEST_MAX_PROCS +fi + +rm -rf "$LOG_DIR" +mkdir -p "$LOG_DIR" + +echo "Discovering tests..." +# Use the Python script to get the fully qualified test names +ALL_TEST_NAMES=$(python3 ${CPP_SRC_HOME}/generate_test_names.py "$TEST_EXECUTABLE") + +if [ -z "$ALL_TEST_NAMES" ]; then + echo "No tests found to run or error in test discovery." + exit 1 +fi + +EXIT_CODE=0 +export RUN_BOOST_TESTS_IN_BACKGROUND=1 + +function execute_tests() { + + if [[ "$BOOST_TEST_MIXED_MODE" == "true" ]]; then + TEST_CASES=$(sed 's/ /:/g' <<< $@) + else + TEST_CASES=$@ + fi + + # Loop through each test + for TEST_NAME in $TEST_CASES; do + echo "--------------------------------------------------" + echo "Running test: $TEST_NAME" + + # Replace slashes and potentially other special chars for a safe filename + SAFE_TEST_LOG_FILENAME=$(echo "$TEST_NAME" | sed 's/[^a-zA-Z0-9_]/_/g' | cut -c-100) + LOG_FILE="$LOG_DIR/${SAFE_TEST_LOG_FILENAME}.log" + + # Execute the test in a separate process + TESTS=$TEST_NAME cmake --build $BUILD_DIR -t $TEST_SUITE > "$LOG_FILE" 2>&1 + TEST_STATUS=$? + + if [ $TEST_STATUS -eq 0 ]; then + echo "Test '$TEST_NAME' PASSED." + else + echo "Test '$TEST_NAME' FAILED with exit code $TEST_STATUS. Check '$LOG_FILE' for details." + EXIT_CODE=1 # Indicate overall failure if any test fails + fi + done +} + +export -f execute_tests + +echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests "$@"' _ + +echo "--------------------------------------------------" +if [ $EXIT_CODE -eq 0 ]; then + echo "All individual tests PASSED." +else + echo "Some individual tests FAILED. Check logs in '$LOG_DIR'." +fi + +exit $EXIT_CODE diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 1877e64b5e..b96518a8a9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -31,8 +31,17 @@ add_custom_target(run_tests DEPENDS clean_test_results ${ML_TEST_DEPENDS} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} ) +add_custom_target(run_tests_individually + DEPENDS clean_test_results ${ML_TEST_INDIVIDUALLY_DEPENDS} + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} +) add_custom_target(test DEPENDS run_tests COMMAND ${CMAKE_COMMAND} -DTEST_DIR=${CMAKE_BINARY_DIR} -P ${CMAKE_SOURCE_DIR}/cmake/test-check-success.cmake WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} ) +add_custom_target(test_individually + DEPENDS run_tests_individually + COMMAND ${CMAKE_COMMAND} -DTEST_DIR=${CMAKE_BINARY_DIR} -P ${CMAKE_SOURCE_DIR}/cmake/test-check-success.cmake + WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} +) \ No newline at end of file From 1c5c46e00ddd94a440cf0bff79df00fc23205cd8 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 30 Jun 2025 11:40:11 +1200 Subject: [PATCH 24/49] Slight tweak to reduce load on linux builds --- dev-tools/docker/docker_entrypoint.sh | 2 +- dev-tools/docker_test.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/dev-tools/docker/docker_entrypoint.sh b/dev-tools/docker/docker_entrypoint.sh index dba029e51d..1585356c44 100755 --- a/dev-tools/docker/docker_entrypoint.sh +++ b/dev-tools/docker/docker_entrypoint.sh @@ -66,6 +66,6 @@ if [ "x$1" = "x--test" ] ; then # failure is the unit tests, and then the detailed test results can be # copied from the image echo passed > build/test_status.txt - cmake --build cmake-build-docker ${CMAKE_VERBOSE} -j`nproc` -t test_individually || echo failed > build/test_status.txt + cmake --build cmake-build-docker ${CMAKE_VERBOSE} -j $(( `nproc`/2 )) -t test_individually || echo failed > build/test_status.txt fi diff --git a/dev-tools/docker_test.sh b/dev-tools/docker_test.sh index aed18fa609..44a972b6c7 100755 --- a/dev-tools/docker_test.sh +++ b/dev-tools/docker_test.sh @@ -25,7 +25,7 @@ usage() { } PLATFORMS= -EXTRACT_FIND="-name boost_test_results.xml -o -name boost_test_results.junit" +EXTRACT_FIND="-name boost_test_results\*.xml -o -name boost_test_results\*.junit" EXTRACT_EXPLICIT="build/distributions build/test_status.txt" while [ -n "$1" ] From 0109cce78d8bce18fdf185d3705607286ab59271 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 30 Jun 2025 17:00:37 +1200 Subject: [PATCH 25/49] Replace use of python script with bash function --- run_tests_as_seperate_processes.sh | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 6f565ce7c6..7715bdd57a 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -57,9 +57,28 @@ fi rm -rf "$LOG_DIR" mkdir -p "$LOG_DIR" +function get_qualified_test_names() { + executable_path=$1 + + output_lines=$($executable_path --list_content 2>&1) + + while IFS= read -r line; do + match=$(grep -w '^[ ]*C.*Test' <<< "$line"); + if [ $? -eq 0 ]; then + suite=$match + continue + fi + match=$(grep -w 'test.*\*$' <<< "$line"); + if [ $? -eq 0 ]; then + case=$(sed 's/[ \*]//g' <<< "$suite/$match") + echo "$case" + fi + done <<< "$output_lines" +} + +# get the fully qualified test names echo "Discovering tests..." -# Use the Python script to get the fully qualified test names -ALL_TEST_NAMES=$(python3 ${CPP_SRC_HOME}/generate_test_names.py "$TEST_EXECUTABLE") +ALL_TEST_NAMES=$(get_qualified_test_names "$TEST_EXECUTABLE") if [ -z "$ALL_TEST_NAMES" ]; then echo "No tests found to run or error in test discovery." From 01c1e6b4835ed5373555c09c97b7ed4084c8c71a Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 1 Jul 2025 10:22:11 +1200 Subject: [PATCH 26/49] Pass all JUNIT result files to test collector --- .buildkite/pipelines/build_linux.json.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.buildkite/pipelines/build_linux.json.py b/.buildkite/pipelines/build_linux.json.py index 56c6002aaf..78fc7afea9 100755 --- a/.buildkite/pipelines/build_linux.json.py +++ b/.buildkite/pipelines/build_linux.json.py @@ -72,10 +72,9 @@ def main(args): "RUN_TESTS": "true", "BOOST_TEST_OUTPUT_FORMAT_FLAGS": "--logger=JUNIT,error,boost_test_results.junit", }, - "artifact_paths": "*/**/unittest/boost_test_results.junit", "plugins": { "test-collector#v1.2.0": { - "files": "*/*/unittest/boost_test_results.junit", + "files": "*/*/unittest/boost_test_results_*.junit", "format": "junit" } }, From e2d2d17ef58fd81b17b49920558beb5606ea9dbb Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 2 Jul 2025 16:27:47 +1200 Subject: [PATCH 27/49] Better reporting of unit test results Combine multiple JUNIT results files from individual test runs into one file, omitting any skipped tests --- merge_results.sh | 41 ++++++++++++++++++++++++++++++ run_tests_as_seperate_processes.sh | 16 +++++++----- 2 files changed, 50 insertions(+), 7 deletions(-) create mode 100755 merge_results.sh diff --git a/merge_results.sh b/merge_results.sh new file mode 100755 index 0000000000..c1d5446091 --- /dev/null +++ b/merge_results.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one +# or more contributor license agreements. Licensed under the Elastic License +# 2.0 and the following additional limitation. Functionality enabled by the +# files subject to the Elastic License 2.0 may only be used in production when +# invoked by an Elasticsearch process with a license key installed that permits +# use of machine learning features. You may not use this file except in +# compliance with the Elastic License 2.0 and the foregoing additional +# limitation. +# + +# This script amalgamates multiple JUNIT results files from individual tests +# into one, omitting any test cases that have been skipped. The result is +# output to stdout. + +if [ $# -lt 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +JUNIT_FILES="$@" + +echo "" +cat $JUNIT_FILES | \ + gawk -n ' + BEGIN{tests=0; skipped=0; errors=0; failures=0; id=""; time=0.0; name=""} + { + where=match($0, /"}' + +cat $JUNIT_FILES | sed -e '/xml/d' -e '/testsuite/d' -e '//{H;d;};x;/skipped/d' | grep '.' +echo "" +echo + diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 7715bdd57a..601fc389c1 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -20,7 +20,7 @@ # # It should be called with 3 parameters # cmake_build_dir: The directory that cmake is using for build outputs, i.e. that passed to cmake's --build argument -# cmake_current_binary_dir: The directory containing the current test suite executable e.g. /test/lib/api/unittest +# unit_test_dir: The relative directory containing the current test suite e.g. lib/api/unittest # test_suite: The name of the test suite to run, minus any leading "ml_", e.g. "test_api" # # In addition to the required parameters there are several environment variables that control the script's behaviour @@ -32,16 +32,16 @@ if [ $# -lt 3 ]; then echo "Usage: $0 " - echo "e.g.: $0 ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local/test/lib/api/unittest test_api" + echo "e.g.: $0 ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local lib/api/unittest test_api" exit fi export BUILD_DIR=$1 -export BINARY_DIR=$2 +export TEST_DIR=$2 export TEST_SUITE=$3 -export TEST_EXECUTABLE="$2/ml_$3" -export LOG_DIR="$2/test_logs" +export TEST_EXECUTABLE="$1/test/$2/ml_$3" +export LOG_DIR="$1/test/$2/test_logs" MAX_ARGS=2 MAX_PROCS=4 @@ -124,9 +124,11 @@ echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests " echo "--------------------------------------------------" if [ $EXIT_CODE -eq 0 ]; then - echo "All individual tests PASSED." + echo "$TEST_SUITE: All individual tests PASSED." else - echo "Some individual tests FAILED. Check logs in '$LOG_DIR'." + echo "$TEST_SUITE: Some individual tests FAILED. Check logs in '$LOG_DIR'." fi +./merge_results.sh $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit + exit $EXIT_CODE From 7904e76ebb74cfd14c2049f0096f401b67f72697 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 2 Jul 2025 17:07:11 +1200 Subject: [PATCH 28/49] Slight tweak to parameters to test script --- run_tests_as_seperate_processes.sh | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 601fc389c1..0bb718526a 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -20,7 +20,7 @@ # # It should be called with 3 parameters # cmake_build_dir: The directory that cmake is using for build outputs, i.e. that passed to cmake's --build argument -# unit_test_dir: The relative directory containing the current test suite e.g. lib/api/unittest +# cmake_current_binary_dir: The directory containing the current test suite executable e.g. /test/lib/api/unittest # test_suite: The name of the test suite to run, minus any leading "ml_", e.g. "test_api" # # In addition to the required parameters there are several environment variables that control the script's behaviour @@ -32,16 +32,18 @@ if [ $# -lt 3 ]; then echo "Usage: $0 " - echo "e.g.: $0 ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local lib/api/unittest test_api" + echo "e.g.: $0 ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local/test/lib/api/unittest test_api" exit fi export BUILD_DIR=$1 -export TEST_DIR=$2 +export BINARY_DIR=$2 export TEST_SUITE=$3 -export TEST_EXECUTABLE="$1/test/$2/ml_$3" -export LOG_DIR="$1/test/$2/test_logs" +TEST_DIR=$(echo $BINARY_DIR | sed "s|$BUILD_DIR/test/||") + +export TEST_EXECUTABLE="$2/ml_$3" +export LOG_DIR="$2/test_logs" MAX_ARGS=2 MAX_PROCS=4 @@ -124,9 +126,9 @@ echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests " echo "--------------------------------------------------" if [ $EXIT_CODE -eq 0 ]; then - echo "$TEST_SUITE: All individual tests PASSED." + echo "All individual tests PASSED." else - echo "$TEST_SUITE: Some individual tests FAILED. Check logs in '$LOG_DIR'." + echo "Some individual tests FAILED. Check logs in '$LOG_DIR'." fi ./merge_results.sh $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit From 185a0e09bf4daf70d156bb1c3d75346e1781450f Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 3 Jul 2025 09:44:52 +1200 Subject: [PATCH 29/49] Tweaks and typos --- .buildkite/pipelines/build_linux.json.py | 2 +- run_tests_as_seperate_processes.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.buildkite/pipelines/build_linux.json.py b/.buildkite/pipelines/build_linux.json.py index 78fc7afea9..efe313b155 100755 --- a/.buildkite/pipelines/build_linux.json.py +++ b/.buildkite/pipelines/build_linux.json.py @@ -74,7 +74,7 @@ def main(args): }, "plugins": { "test-collector#v1.2.0": { - "files": "*/*/unittest/boost_test_results_*.junit", + "files": "*/*/unittest/boost_test_results.junit", "format": "junit" } }, diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 0bb718526a..4d15de5ee7 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -126,9 +126,9 @@ echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests " echo "--------------------------------------------------" if [ $EXIT_CODE -eq 0 ]; then - echo "All individual tests PASSED." + echo "$TEST_SUITE: All individual tests PASSED." else - echo "Some individual tests FAILED. Check logs in '$LOG_DIR'." + echo "$TEST_SUITE: Some individual tests FAILED. Check logs in '$LOG_DIR'." fi ./merge_results.sh $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit From 0a6a08695466692811ca71cc30f0fa3df5666649 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 3 Jul 2025 14:46:44 +1200 Subject: [PATCH 30/49] Portability fixes --- run_tests_as_seperate_processes.sh | 38 ++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 4d15de5ee7..8011bc0fe7 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -29,6 +29,9 @@ # BOOST_TEST_MIXED_MODE: If set to "true" then rather than iterating over each individual test passed to a sub-shell # run them all in the same BOOST test executable process. # +# Design decisions: The script relies upon the simplest tools available on most unix like platforms - bash, sed and +# awk (the awk script does not use any GNU extensions for maximum portability). This is to keep the number of dependencies +# required by CI build images to a minimum (so e.g. no python etc.) if [ $# -lt 3 ]; then echo "Usage: $0 " @@ -40,7 +43,7 @@ export BUILD_DIR=$1 export BINARY_DIR=$2 export TEST_SUITE=$3 -TEST_DIR=$(echo $BINARY_DIR | sed "s|$BUILD_DIR/test/||") +TEST_DIR=${CPP_SRC_HOME}/$(echo $BINARY_DIR | sed "s|$BUILD_DIR/test/||") export TEST_EXECUTABLE="$2/ml_$3" export LOG_DIR="$2/test_logs" @@ -131,6 +134,37 @@ else echo "$TEST_SUITE: Some individual tests FAILED. Check logs in '$LOG_DIR'." fi -./merge_results.sh $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit +function merge_junit_results() { + JUNIT_FILES="$@" + echo "" + cat $JUNIT_FILES | \ + awk ' + BEGIN{tests=0; skipped=0; errors=0; failures=0; id=""; time=0.0; name=""} + $0 ~ /"}' + + cat $JUNIT_FILES | sed -e '/xml/d' -e '/testsuite/d' -e '//{H;d;};x;/skipped/d' | grep '.' +echo "" +echo +} + +merge_junit_results $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit exit $EXIT_CODE From 6c97572660a37a0919a7672da4c6a090c58861ec Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Jul 2025 09:36:08 +1200 Subject: [PATCH 31/49] Fix failing test cases --- lib/core/unittest/CNamedPipeFactoryTest.cc | 43 +++++++++++++--------- 1 file changed, 25 insertions(+), 18 deletions(-) diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 39aef5e07e..da05bb6341 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -38,9 +38,9 @@ const std::size_t MAX_ATTEMPTS{100}; const std::size_t TEST_SIZE{10000}; const char TEST_CHAR{'a'}; #ifdef Windows -const char* const TEST_PIPE_NAME{"\\\\.\\pipe\\testpipe"}; +static const std::string TEST_PIPE_NAME{"\\\\.\\pipe\\testpipe"}; #else -const char* const TEST_PIPE_NAME{"testfiles/testpipe"}; +static const std::string TEST_PIPE_NAME{"testfiles/testpipe"}; #endif class CThreadBlockCanceller : public ml::core::CThread { @@ -71,13 +71,14 @@ class CThreadBlockCanceller : public ml::core::CThread { } BOOST_AUTO_TEST_CASE(testServerIsCppReader) { - ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, TEST_PIPE_NAME, + const std::string pipeName=TEST_PIPE_NAME+"testServerIsCppReader"; + ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, TEST_CHAR, TEST_SIZE}; BOOST_TEST_REQUIRE(threadWriter.start()); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TIStreamP strm{ - ml::core::CNamedPipeFactory::openPipeStreamRead(TEST_PIPE_NAME, dummy)}; + ml::core::CNamedPipeFactory::openPipeStreamRead(pipeName, dummy)}; BOOST_TEST_REQUIRE(strm); static const std::streamsize BUF_SIZE{512}; @@ -100,13 +101,15 @@ BOOST_AUTO_TEST_CASE(testServerIsCppReader) { } BOOST_AUTO_TEST_CASE(testServerIsCReader) { - ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, TEST_PIPE_NAME, + const std::string pipeName=TEST_PIPE_NAME+"testServerIsCReader"; + + ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, TEST_CHAR, TEST_SIZE}; BOOST_TEST_REQUIRE(threadWriter.start()); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TFileP file{ - ml::core::CNamedPipeFactory::openPipeFileRead(TEST_PIPE_NAME, dummy)}; + ml::core::CNamedPipeFactory::openPipeFileRead(pipeName, dummy)}; BOOST_TEST_REQUIRE(file); static const std::size_t BUF_SIZE{512}; @@ -129,12 +132,14 @@ BOOST_AUTO_TEST_CASE(testServerIsCReader) { } BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { - ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; + const std::string pipeName=TEST_PIPE_NAME+"testServerIsCppWriter"; + + ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, pipeName}; BOOST_TEST_REQUIRE(threadReader.start()); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TOStreamP strm{ - ml::core::CNamedPipeFactory::openPipeStreamWrite(TEST_PIPE_NAME, dummy)}; + ml::core::CNamedPipeFactory::openPipeStreamWrite(pipeName, dummy)}; BOOST_TEST_REQUIRE(strm); std::size_t charsLeft{TEST_SIZE}; @@ -159,12 +164,14 @@ BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { } BOOST_AUTO_TEST_CASE(testServerIsCWriter) { - ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; + const std::string pipeName=TEST_PIPE_NAME+"testServerIsCWriter"; + + ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, pipeName}; BOOST_TEST_REQUIRE(threadReader.start()); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TFileP file{ - ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; + ml::core::CNamedPipeFactory::openPipeFileWrite(pipeName, dummy)}; BOOST_TEST_REQUIRE(file); std::size_t charsLeft{TEST_SIZE}; @@ -200,7 +207,7 @@ BOOST_AUTO_TEST_CASE(testCancelBlock) { } BOOST_AUTO_TEST_CASE(testErrorIfRegularFile) { - std::atomic_bool dummy{false}; + const std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TIStreamP strm{ ml::core::CNamedPipeFactory::openPipeStreamRead("Main.cc", dummy)}; BOOST_TEST_REQUIRE(strm == nullptr); @@ -215,23 +222,23 @@ BOOST_AUTO_TEST_CASE(testErrorIfSymlink) { // Suppress the error about no assertions in this case BOOST_REQUIRE(BOOST_IS_DEFINED(Windows)); #else - static const char* const TEST_SYMLINK_NAME{"test_symlink"}; + const std::string TEST_SYMLINK_NAME{"test_symlink"}; // Remove any files left behind by a previous failed test, but don't check // the return codes as these calls will usually fail - ::unlink(TEST_SYMLINK_NAME); - ::unlink(TEST_PIPE_NAME); + ::unlink(TEST_SYMLINK_NAME.c_str()); + ::unlink(TEST_PIPE_NAME.c_str()); - BOOST_REQUIRE_EQUAL(0, ::mkfifo(TEST_PIPE_NAME, S_IRUSR | S_IWUSR)); - BOOST_REQUIRE_EQUAL(0, ::symlink(TEST_PIPE_NAME, TEST_SYMLINK_NAME)); + BOOST_REQUIRE_EQUAL(0, ::mkfifo(TEST_PIPE_NAME.c_str(), S_IRUSR | S_IWUSR)); + BOOST_REQUIRE_EQUAL(0, ::symlink(TEST_PIPE_NAME.c_str(), TEST_SYMLINK_NAME.c_str())); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TIStreamP strm{ ml::core::CNamedPipeFactory::openPipeStreamRead(TEST_SYMLINK_NAME, dummy)}; BOOST_TEST_REQUIRE(strm == nullptr); - BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_SYMLINK_NAME)); - BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_PIPE_NAME)); + BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_SYMLINK_NAME.c_str())); + BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_PIPE_NAME.c_str())); #endif } From 733b03871bfb1552c1e16d80964163ec854c00da Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Jul 2025 10:43:19 +1200 Subject: [PATCH 32/49] Formatting --- lib/core/unittest/CNamedPipeFactoryTest.cc | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index da05bb6341..ae90cf48d5 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -38,9 +38,9 @@ const std::size_t MAX_ATTEMPTS{100}; const std::size_t TEST_SIZE{10000}; const char TEST_CHAR{'a'}; #ifdef Windows -static const std::string TEST_PIPE_NAME{"\\\\.\\pipe\\testpipe"}; +const std::string TEST_PIPE_NAME{"\\\\.\\pipe\\testpipe"}; #else -static const std::string TEST_PIPE_NAME{"testfiles/testpipe"}; +const std::string TEST_PIPE_NAME{"testfiles/testpipe"}; #endif class CThreadBlockCanceller : public ml::core::CThread { @@ -71,9 +71,8 @@ class CThreadBlockCanceller : public ml::core::CThread { } BOOST_AUTO_TEST_CASE(testServerIsCppReader) { - const std::string pipeName=TEST_PIPE_NAME+"testServerIsCppReader"; - ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, - TEST_CHAR, TEST_SIZE}; + const std::string pipeName = TEST_PIPE_NAME + "testServerIsCppReader"; + ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, TEST_CHAR, TEST_SIZE}; BOOST_TEST_REQUIRE(threadWriter.start()); std::atomic_bool dummy{false}; @@ -101,10 +100,9 @@ BOOST_AUTO_TEST_CASE(testServerIsCppReader) { } BOOST_AUTO_TEST_CASE(testServerIsCReader) { - const std::string pipeName=TEST_PIPE_NAME+"testServerIsCReader"; + const std::string pipeName = TEST_PIPE_NAME + "testServerIsCReader"; - ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, - TEST_CHAR, TEST_SIZE}; + ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, TEST_CHAR, TEST_SIZE}; BOOST_TEST_REQUIRE(threadWriter.start()); std::atomic_bool dummy{false}; @@ -132,7 +130,7 @@ BOOST_AUTO_TEST_CASE(testServerIsCReader) { } BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { - const std::string pipeName=TEST_PIPE_NAME+"testServerIsCppWriter"; + const std::string pipeName = TEST_PIPE_NAME + "testServerIsCppWriter"; ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, pipeName}; BOOST_TEST_REQUIRE(threadReader.start()); @@ -164,7 +162,7 @@ BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { } BOOST_AUTO_TEST_CASE(testServerIsCWriter) { - const std::string pipeName=TEST_PIPE_NAME+"testServerIsCWriter"; + const std::string pipeName = TEST_PIPE_NAME + "testServerIsCWriter"; ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, pipeName}; BOOST_TEST_REQUIRE(threadReader.start()); From 62dce12f9cbc51917aaf95af9e5db434d5b1c19c Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Jul 2025 12:35:55 +1200 Subject: [PATCH 33/49] Tidy up of scripts --- cmake/test-runner.cmake | 8 ++-- dev-tools/docker_test.sh | 2 +- generate_test_names.py | 77 ------------------------------ merge_results.sh | 41 ---------------- run_tests_as_seperate_processes.sh | 1 - 5 files changed, 5 insertions(+), 124 deletions(-) delete mode 100755 generate_test_names.py delete mode 100755 merge_results.sh diff --git a/cmake/test-runner.cmake b/cmake/test-runner.cmake index 7c13d59f0c..c0acac469a 100644 --- a/cmake/test-runner.cmake +++ b/cmake/test-runner.cmake @@ -35,15 +35,15 @@ else() set(SAFE_TEST_NAME "_${SAFE_TEST_NAME}") endif() - # If env var RUN_BOOST_TESTS_IN_BACKGROUND is defined run the tests in the background - message(STATUS "RUN_BOOST_TESTS_IN_BACKGROUND=$ENV{RUN_BOOST_TESTS_IN_BACKGROUND}") + # If env var RUN_BOOST_TESTS_IN_FOREGROUND is defined run the tests in the foreground + message(STATUS "RUN_BOOST_TESTS_IN_FOREGROUND=$ENV{RUN_BOOST_TESTS_IN_FOREGROUND}") if (DEFINED TEST_FLAGS OR DEFINED TESTS) string(REPLACE "boost_test_results" "boost_test_results${SAFE_TEST_NAME}" BOOST_TEST_OUTPUT_FORMAT_FLAGS "$ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS}") set(OUTPUT_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.out") set(FAILED_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.failed") - if(DEFINED ENV{RUN_BOOST_TESTS_IN_BACKGROUND}) + if(NOT DEFINED ENV{RUN_BOOST_TESTS_IN_FOREGROUND}) message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output") execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) message(STATUS "TESTS EXITED WITH SUCCESS ${TEST_SUCCESS}") @@ -53,7 +53,7 @@ else() message(STATUS "TESTS EXITED WITH SUCCESS ${TEST_SUCCESS}") endif() else() - if(DEFINED ENV{RUN_BOOST_TESTS_IN_BACKGROUND}) + if(NOT DEFINED ENV{RUN_BOOST_TESTS_IN_FOREGROUND}) execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} $ENV{TEST_FLAGS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) else() diff --git a/dev-tools/docker_test.sh b/dev-tools/docker_test.sh index 44a972b6c7..aed18fa609 100755 --- a/dev-tools/docker_test.sh +++ b/dev-tools/docker_test.sh @@ -25,7 +25,7 @@ usage() { } PLATFORMS= -EXTRACT_FIND="-name boost_test_results\*.xml -o -name boost_test_results\*.junit" +EXTRACT_FIND="-name boost_test_results.xml -o -name boost_test_results.junit" EXTRACT_EXPLICIT="build/distributions build/test_status.txt" while [ -n "$1" ] diff --git a/generate_test_names.py b/generate_test_names.py deleted file mode 100755 index 805c169d6e..0000000000 --- a/generate_test_names.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/env python3 -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. - -# This script provides a wrapper around a call to a BOOST test executable -# to return a formatted list of tests such that each fully qualified test -# name would be in a form suitable to being passed to BOOST test's "--run_test" -# parameter. -# It takes precisely one positional parameter, the path to a BOOST test executable. - - -import argparse -import re -import subprocess -import sys - - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument('exec_path', help='The path to the ml_test suite executable') - return parser.parse_args() - -def get_qualified_test_names(executable_path): - - cmd = [args.exec_path, "--list_content"] - process = subprocess.run(cmd, capture_output=True, text=True, check=True) - output_lines = process.stderr.splitlines() - - test_names = [] - current_suite_stack = [] - - for line in output_lines: - match_suite = re.match(r'^( *)(C.*Test)\*$', line) - match_case = re.match(r'^( *)(test.*)\*$', line) - - if match_suite: - indent_level = len(match_suite.group(1)) - suite_name = match_suite.group(2) - - # Pop suites from stack if current indent is less or equal - while current_suite_stack and len(current_suite_stack[-1][0]) >= indent_level: - current_suite_stack.pop() - - current_suite_stack.append((match_suite.group(1), suite_name)) - elif match_case: - indent_level = len(match_case.group(1)) - case_name = match_case.group(2) - - # Pop suites from stack if current indent is less (for sibling suites/cases) - while current_suite_stack and len(current_suite_stack[-1][0]) >= indent_level: - current_suite_stack.pop() - - full_path = "/".join([s[1] for s in current_suite_stack] + [case_name]) - test_names.append(full_path) - return test_names - -if __name__ == "__main__": - args = parse_arguments() - try: - names = get_qualified_test_names(args.exec_path) - for name in names: - print(name) - except subprocess.CalledProcessError as e: - print(f"Error listing tests: {e.stderr}", file=sys.stderr) - sys.exit(1) - except FileNotFoundError: - print(f"Error: Test executable '{args.exec_path}' not found.", file=sys.stderr) - sys.exit(1) - except Exception as e: - print(f"An unexpected error occurred: {e}", file=sys.stderr) - sys.exit(1) diff --git a/merge_results.sh b/merge_results.sh deleted file mode 100755 index c1d5446091..0000000000 --- a/merge_results.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. -# - -# This script amalgamates multiple JUNIT results files from individual tests -# into one, omitting any test cases that have been skipped. The result is -# output to stdout. - -if [ $# -lt 1 ]; then - echo "Usage: $0 " - exit 1 -fi - -JUNIT_FILES="$@" - -echo "" -cat $JUNIT_FILES | \ - gawk -n ' - BEGIN{tests=0; skipped=0; errors=0; failures=0; id=""; time=0.0; name=""} - { - where=match($0, /"}' - -cat $JUNIT_FILES | sed -e '/xml/d' -e '/testsuite/d' -e '//{H;d;};x;/skipped/d' | grep '.' -echo "" -echo - diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 8011bc0fe7..43daf0819c 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -91,7 +91,6 @@ if [ -z "$ALL_TEST_NAMES" ]; then fi EXIT_CODE=0 -export RUN_BOOST_TESTS_IN_BACKGROUND=1 function execute_tests() { From 55875c001d293c6325001c0132584911356dca3a Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Jul 2025 14:40:50 +1200 Subject: [PATCH 34/49] Further tidy up of scripts --- .buildkite/scripts/steps/build_and_test.sh | 2 +- cmake/test-runner.cmake | 81 +++++++++------------- run_tests_as_seperate_processes.sh | 4 +- 3 files changed, 38 insertions(+), 49 deletions(-) diff --git a/.buildkite/scripts/steps/build_and_test.sh b/.buildkite/scripts/steps/build_and_test.sh index 5fc133874c..023d4c7488 100755 --- a/.buildkite/scripts/steps/build_and_test.sh +++ b/.buildkite/scripts/steps/build_and_test.sh @@ -106,7 +106,7 @@ fi if [[ -z "$CPP_CROSS_COMPILE" ]] ; then OS=$(uname -s | tr "A-Z" "a-z") TEST_RESULTS_ARCHIVE=${OS}-${HARDWARE_ARCH}-unit_test_results.tgz - find . -path "*/**/ml_test_*.out" -o -path "*/**/*.junit" | xargs tar cvzf ${TEST_RESULTS_ARCHIVE} + find . \( -path "*/**/ml_test_*.out" -o -path "*/**/*.junit" \) -print0 | tar czf ${TEST_RESULTS_ARCHIVE} --null -T - buildkite-agent artifact upload "${TEST_RESULTS_ARCHIVE}" fi diff --git a/cmake/test-runner.cmake b/cmake/test-runner.cmake index c0acac469a..481b2dd092 100644 --- a/cmake/test-runner.cmake +++ b/cmake/test-runner.cmake @@ -14,60 +14,47 @@ execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f ${TEST_DIR}/*.failed) execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f boost_test_results*.xml) execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f boost_test_results*.junit) -set(INDIVIDUAL_TEST "CAnnotationJsonWriterTest/testWrite") - -if(TEST_NAME STREQUAL "ml_test_seccomp") - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} --logger=HRF,all --report_format=HRF --show_progress=no --no_color_output OUTPUT_FILE ${TEST_DIR}/${TEST_NAME}.out ERROR_FILE ${TEST_DIR}/${TEST_NAME}.out RESULT_VARIABLE TEST_SUCCESS) -else() - # Turn the TEST_FLAGS environment variable into a CMake list variable - if (DEFINED ENV{TEST_FLAGS} AND NOT "$ENV{TEST_FLAGS}" STREQUAL "") - string(REPLACE " " ";" TEST_FLAGS $ENV{TEST_FLAGS}) - endif() - +# Turn the TEST_FLAGS environment variable into a CMake list variable +if (DEFINED ENV{TEST_FLAGS} AND NOT "$ENV{TEST_FLAGS}" STREQUAL "") + string(REPLACE " " ";" TEST_FLAGS $ENV{TEST_FLAGS}) +endif() - set(SAFE_TEST_NAME "") - set(OUTPUT_FILE "${TEST_DIR}/${TEST_NAME}.out") - set(FAILED_FILE "${TEST_DIR}/${TEST_NAME}.failed") - # Special case for specifying a subset of tests to run (can be regex) - if (DEFINED ENV{TESTS} AND NOT "$ENV{TESTS}" STREQUAL "") - set(TESTS "--run_test=$ENV{TESTS}") - string(REGEX REPLACE "[^a-zA-Z0-9_]" "_" SAFE_TEST_NAME "$ENV{TESTS}") - set(SAFE_TEST_NAME "_${SAFE_TEST_NAME}") - endif() +set(SAFE_TEST_NAME "") +set(TESTS "") +# Special case for specifying a subset of tests to run (can be regex) +if (DEFINED ENV{TESTS} AND NOT "$ENV{TESTS}" STREQUAL "") + set(TESTS "--run_test=$ENV{TESTS}") + string(REGEX REPLACE "[^a-zA-Z0-9_]" "_" SAFE_TEST_NAME "$ENV{TESTS}") + set(SAFE_TEST_NAME "_${SAFE_TEST_NAME}") +endif() - # If env var RUN_BOOST_TESTS_IN_FOREGROUND is defined run the tests in the foreground - message(STATUS "RUN_BOOST_TESTS_IN_FOREGROUND=$ENV{RUN_BOOST_TESTS_IN_FOREGROUND}") +message(STATUS "SAFE_TEST_NAME=${SAFE_TEST_NAME}") +string(REPLACE "boost_test_results" "boost_test_results${SAFE_TEST_NAME}" BOOST_TEST_OUTPUT_FORMAT_FLAGS "$ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS}") +set(OUTPUT_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.out") +set(FAILED_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.failed") - if (DEFINED TEST_FLAGS OR DEFINED TESTS) - string(REPLACE "boost_test_results" "boost_test_results${SAFE_TEST_NAME}" BOOST_TEST_OUTPUT_FORMAT_FLAGS "$ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS}") - set(OUTPUT_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.out") - set(FAILED_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.failed") +# If env var RUN_BOOST_TESTS_IN_FOREGROUND is defined run the tests in the foreground +message(STATUS "RUN_BOOST_TESTS_IN_FOREGROUND=$ENV{RUN_BOOST_TESTS_IN_FOREGROUND}") - if(NOT DEFINED ENV{RUN_BOOST_TESTS_IN_FOREGROUND}) - message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output") - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) - message(STATUS "TESTS EXITED WITH SUCCESS ${TEST_SUCCESS}") - else() - message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS}") - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) - message(STATUS "TESTS EXITED WITH SUCCESS ${TEST_SUCCESS}") - endif() +if(TEST_NAME STREQUAL "ml_test_seccomp") + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --logger=HRF,all --report_format=HRF --show_progress=no --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) +else() + if(NOT DEFINED ENV{RUN_BOOST_TESTS_IN_FOREGROUND}) + message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output") + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) else() - if(NOT DEFINED ENV{RUN_BOOST_TESTS_IN_FOREGROUND}) - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} $ENV{TEST_FLAGS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} - --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) - else() - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) - endif() + message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS}") + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) endif() +endif() - if (NOT TEST_SUCCESS EQUAL 0) - if (EXISTS ${TEST_DIR}/${TEST_NAME}) - execute_process(COMMAND ${CMAKE_COMMAND} -E cat ${OUTPUT_FILE}) - file(WRITE "${TEST_DIR}/${FAILED_FILE}" "") - endif() - message(FATAL_ERROR "Exiting with status ${TEST_SUCCESS}") - endif() +message(STATUS "TESTS EXITED WITH SUCCESS ${TEST_SUCCESS}") +if (NOT TEST_SUCCESS EQUAL 0) + if (EXISTS ${TEST_DIR}/${TEST_NAME}) + execute_process(COMMAND ${CMAKE_COMMAND} -E cat ${OUTPUT_FILE}) + file(WRITE "${TEST_DIR}/${FAILED_FILE}" "") + endif() + message(FATAL_ERROR "Exiting with status ${TEST_SUCCESS}") endif() diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 43daf0819c..641552f120 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -164,6 +164,8 @@ echo "" echo } -merge_junit_results $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit +if [ "$TEST_SUITE" != "test_seccomp" ]; then + merge_junit_results $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit +fi exit $EXIT_CODE From 9c6735380ba53609eecc1df22adab0af5f80a8e9 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 4 Jul 2025 15:59:08 +1200 Subject: [PATCH 35/49] Tweaks for linux aarch64 --- dev-tools/docker/docker_entrypoint.sh | 2 +- dev-tools/docker_test.sh | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/dev-tools/docker/docker_entrypoint.sh b/dev-tools/docker/docker_entrypoint.sh index 1585356c44..8311efffe4 100755 --- a/dev-tools/docker/docker_entrypoint.sh +++ b/dev-tools/docker/docker_entrypoint.sh @@ -66,6 +66,6 @@ if [ "x$1" = "x--test" ] ; then # failure is the unit tests, and then the detailed test results can be # copied from the image echo passed > build/test_status.txt - cmake --build cmake-build-docker ${CMAKE_VERBOSE} -j $(( `nproc`/2 )) -t test_individually || echo failed > build/test_status.txt + cmake --build cmake-build-docker ${CMAKE_VERBOSE} -j $(nproc) -t test_individually || echo failed > build/test_status.txt fi diff --git a/dev-tools/docker_test.sh b/dev-tools/docker_test.sh index aed18fa609..016795ce75 100755 --- a/dev-tools/docker_test.sh +++ b/dev-tools/docker_test.sh @@ -92,7 +92,10 @@ do # Using tar to copy the build and test artifacts out of the container seems # more reliable than docker cp, and also means the files end up with the # correct uid/gid - docker run --rm --workdir=/ml-cpp $TEMP_TAG bash -c "find . $EXTRACT_FIND | xargs tar cf - $EXTRACT_EXPLICIT && sleep 30" | tar xvf - + docker run --rm --workdir=/ml-cpp $TEMP_TAG bash -c "find . \( $EXTRACT_FIND \) -print0 | tar cf - $EXTRACT_EXPLICIT --null -T - && sleep 60" | tar xf - + if [ $? != 0 ]; then + echo "Copying build and test artifacts from docker container failed" + fi docker rmi --force $TEMP_TAG # The image build is set to return zero (i.e. succeed as far as Docker is # concerned) when the only problem is that the unit tests fail, as this From d265102884d237d9af28a3628dc54e89fcacc6d0 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Jul 2025 09:49:35 +1200 Subject: [PATCH 36/49] Tweak for linux aarch64 seccomp test --- dev-tools/docker_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/docker_test.sh b/dev-tools/docker_test.sh index 016795ce75..013a3f3ec0 100755 --- a/dev-tools/docker_test.sh +++ b/dev-tools/docker_test.sh @@ -92,7 +92,7 @@ do # Using tar to copy the build and test artifacts out of the container seems # more reliable than docker cp, and also means the files end up with the # correct uid/gid - docker run --rm --workdir=/ml-cpp $TEMP_TAG bash -c "find . \( $EXTRACT_FIND \) -print0 | tar cf - $EXTRACT_EXPLICIT --null -T - && sleep 60" | tar xf - + docker run --rm --workdir=/ml-cpp $TEMP_TAG bash -c "find . \( $EXTRACT_FIND \) -print0 | tar cf - $EXTRACT_EXPLICIT --null -T - | tar xvf - if [ $? != 0 ]; then echo "Copying build and test artifacts from docker container failed" fi From 2cf57527bc5bfb388560428345c7aaa7f5744ed9 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 7 Jul 2025 10:30:54 +1200 Subject: [PATCH 37/49] Typo --- dev-tools/docker_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/docker_test.sh b/dev-tools/docker_test.sh index 013a3f3ec0..22a46f4c9f 100755 --- a/dev-tools/docker_test.sh +++ b/dev-tools/docker_test.sh @@ -92,7 +92,7 @@ do # Using tar to copy the build and test artifacts out of the container seems # more reliable than docker cp, and also means the files end up with the # correct uid/gid - docker run --rm --workdir=/ml-cpp $TEMP_TAG bash -c "find . \( $EXTRACT_FIND \) -print0 | tar cf - $EXTRACT_EXPLICIT --null -T - | tar xvf - + docker run --rm --workdir=/ml-cpp $TEMP_TAG bash -c "find . \( $EXTRACT_FIND \) -print0 | tar cf - $EXTRACT_EXPLICIT --null -T -" | tar xvf - if [ $? != 0 ]; then echo "Copying build and test artifacts from docker container failed" fi From 3ace6f454a724695158307ee05a3baf9f46902d9 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 8 Jul 2025 13:32:21 +1200 Subject: [PATCH 38/49] Better isolation of tests --- lib/api/unittest/CMultiFileDataAdderTest.cc | 17 ++++++++++- lib/core/unittest/CLoggerTest.cc | 20 +++++++------ lib/core/unittest/CNamedPipeFactoryTest.cc | 21 +++++++------- run_tests_as_seperate_processes.sh | 31 +++++++++++++-------- 4 files changed, 59 insertions(+), 30 deletions(-) diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index 96231c5679..cd86625d07 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -37,6 +37,8 @@ #include #include #include +#include +#include // For random number generation facilities #include #include @@ -112,10 +114,18 @@ void detectorPersistHelper(const std::string& configFileName, std::string origBaseDocId(JOB_ID + '_' + CTestAnomalyJob::STATE_TYPE + '_' + origSnapshotId); + // Create a random number to use to generate a unique file name for each test + // this allows tests to be run successfully in parallel + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distrib(1, 100); + std::ostringstream oss; + oss << distrib(gen); + std::string temp; TStrVec origFileContents(numOrigDocs); for (size_t index = 0; index < numOrigDocs; ++index) { - std::string expectedOrigFilename(baseOrigOutputFilename); + std::string expectedOrigFilename(baseOrigOutputFilename+"_"+oss.str()+"_"); expectedOrigFilename += "/_index/"; expectedOrigFilename += ml::core::CDataAdder::makeCurrentDocId(origBaseDocId, 1 + index); @@ -247,6 +257,10 @@ BOOST_AUTO_TEST_CASE(testSimpleWrite) { BOOST_REQUIRE_NO_THROW(boost::filesystem::remove_all(workDir)); } +#ifndef Linux // These disabled tests all fail when run as part of a full ml_test_api run on Linux, due to hard memory limits being hit. +// This is due to the ResourceMonitor.totalMemory() returning max_rss on that platform +// which means, as it never decreases for the lifetime of the ml_test_api process, that +// prior test cases can affect latter ones. BOOST_AUTO_TEST_CASE(testDetectorPersistBy) { detectorPersistHelper("testfiles/new_mlfields.json", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); @@ -271,5 +285,6 @@ BOOST_AUTO_TEST_CASE(testDetectorPersistCount) { detectorPersistHelper("testfiles/new_persist_count.json", "testfiles/files_users_programs.csv", 5); } +#endif BOOST_AUTO_TEST_SUITE_END() diff --git a/lib/core/unittest/CLoggerTest.cc b/lib/core/unittest/CLoggerTest.cc index 1abec95da0..d41d4cce3a 100644 --- a/lib/core/unittest/CLoggerTest.cc +++ b/lib/core/unittest/CLoggerTest.cc @@ -49,12 +49,13 @@ class CTestFixture { } }; -std::function makeReader(std::ostringstream& loggedData) { - return [&loggedData] { +std::function makeReader(std::ostringstream& loggedData, const std::string& pipeName) { + return [&loggedData, &pipeName]() { + for (std::size_t attempt = 1; attempt <= 100; ++attempt) { // wait a bit so that pipe has been created std::this_thread::sleep_for(std::chrono::milliseconds(50)); - std::ifstream strm(TEST_PIPE_NAME); + std::ifstream strm(pipeName); if (strm.is_open()) { std::copy(std::istreambuf_iterator(strm), std::istreambuf_iterator(), @@ -62,7 +63,7 @@ std::function makeReader(std::ostringstream& loggedData) { return; } } - BOOST_FAIL("Failed to connect to logging pipe within a reasonable time"); + BOOST_FAIL("Failed to connect to logging pipe " + pipeName + " within a reasonable time"); }; } @@ -204,12 +205,13 @@ BOOST_FIXTURE_TEST_CASE(testNonAsciiJsonLogging, CTestFixture) { "Non-iso8859-15: 编码 test", "surrogate pair: 𐐷 test"}; std::ostringstream loggedData; - std::thread reader(makeReader(loggedData)); + const std::string& pipeName = std::string{TEST_PIPE_NAME}+"_testNonAsciiJsonLogging"; + std::thread reader(makeReader(loggedData, pipeName)); ml::core::CLogger& logger = ml::core::CLogger::instance(); // logger might have been reconfigured in previous tests, so reset and reconfigure it logger.reset(); - logger.reconfigure(TEST_PIPE_NAME, ""); + logger.reconfigure(pipeName, ""); for (const auto& m : messages) { LOG_INFO(<< m); @@ -225,14 +227,16 @@ BOOST_FIXTURE_TEST_CASE(testNonAsciiJsonLogging, CTestFixture) { BOOST_FIXTURE_TEST_CASE(testWarnAndErrorThrottling, CTestFixture) { std::ostringstream loggedData; - std::thread reader{makeReader(loggedData)}; + const std::string& pipeName = std::string{TEST_PIPE_NAME}+"_testWarnAndErrorThrottling"; + + std::thread reader{makeReader(loggedData, pipeName)}; TStrVec messages{"Warn should only be seen once", "Error should only be seen once"}; ml::core::CLogger& logger = ml::core::CLogger::instance(); // logger might have been reconfigured in previous tests, so reset and reconfigure it logger.reset(); - logger.reconfigure(TEST_PIPE_NAME, ""); + logger.reconfigure(pipeName, ""); for (std::size_t i = 0; i < 10; ++i) { LOG_WARN(<< messages[0]); diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index ae90cf48d5..e94fcd6893 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -71,7 +71,7 @@ class CThreadBlockCanceller : public ml::core::CThread { } BOOST_AUTO_TEST_CASE(testServerIsCppReader) { - const std::string pipeName = TEST_PIPE_NAME + "testServerIsCppReader"; + const std::string pipeName = TEST_PIPE_NAME + "_testServerIsCppReader"; ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, TEST_CHAR, TEST_SIZE}; BOOST_TEST_REQUIRE(threadWriter.start()); @@ -100,7 +100,7 @@ BOOST_AUTO_TEST_CASE(testServerIsCppReader) { } BOOST_AUTO_TEST_CASE(testServerIsCReader) { - const std::string pipeName = TEST_PIPE_NAME + "testServerIsCReader"; + const std::string pipeName = TEST_PIPE_NAME + "_testServerIsCReader"; ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, TEST_CHAR, TEST_SIZE}; BOOST_TEST_REQUIRE(threadWriter.start()); @@ -130,7 +130,7 @@ BOOST_AUTO_TEST_CASE(testServerIsCReader) { } BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { - const std::string pipeName = TEST_PIPE_NAME + "testServerIsCppWriter"; + const std::string pipeName = TEST_PIPE_NAME + "_testServerIsCppWriter"; ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, pipeName}; BOOST_TEST_REQUIRE(threadReader.start()); @@ -162,7 +162,7 @@ BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { } BOOST_AUTO_TEST_CASE(testServerIsCWriter) { - const std::string pipeName = TEST_PIPE_NAME + "testServerIsCWriter"; + const std::string pipeName = TEST_PIPE_NAME + "_testServerIsCWriter"; ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, pipeName}; BOOST_TEST_REQUIRE(threadReader.start()); @@ -198,7 +198,7 @@ BOOST_AUTO_TEST_CASE(testCancelBlock) { BOOST_TEST_REQUIRE(cancellerThread.start()); ml::core::CNamedPipeFactory::TOStreamP strm{ml::core::CNamedPipeFactory::openPipeStreamWrite( - TEST_PIPE_NAME, cancellerThread.hasCancelledBlockingCall())}; + TEST_PIPE_NAME+"_testCancelBlock", cancellerThread.hasCancelledBlockingCall())}; BOOST_TEST_REQUIRE(strm == nullptr); BOOST_TEST_REQUIRE(cancellerThread.stop()); @@ -220,15 +220,16 @@ BOOST_AUTO_TEST_CASE(testErrorIfSymlink) { // Suppress the error about no assertions in this case BOOST_REQUIRE(BOOST_IS_DEFINED(Windows)); #else - const std::string TEST_SYMLINK_NAME{"test_symlink"}; + const std::string TEST_SYMLINK_NAME{"test_symlink_testErrorIfSymlink"}; + const std::string testPipeName{TEST_PIPE_NAME+"_test_symlink_testErrorIfSymlink"}; // Remove any files left behind by a previous failed test, but don't check // the return codes as these calls will usually fail ::unlink(TEST_SYMLINK_NAME.c_str()); - ::unlink(TEST_PIPE_NAME.c_str()); + ::unlink(testPipeName.c_str()); - BOOST_REQUIRE_EQUAL(0, ::mkfifo(TEST_PIPE_NAME.c_str(), S_IRUSR | S_IWUSR)); - BOOST_REQUIRE_EQUAL(0, ::symlink(TEST_PIPE_NAME.c_str(), TEST_SYMLINK_NAME.c_str())); + BOOST_REQUIRE_EQUAL(0, ::mkfifo(testPipeName.c_str(), S_IRUSR | S_IWUSR)); + BOOST_REQUIRE_EQUAL(0, ::symlink(testPipeName.c_str(), TEST_SYMLINK_NAME.c_str())); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TIStreamP strm{ @@ -236,7 +237,7 @@ BOOST_AUTO_TEST_CASE(testErrorIfSymlink) { BOOST_TEST_REQUIRE(strm == nullptr); BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_SYMLINK_NAME.c_str())); - BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_PIPE_NAME.c_str())); + BOOST_REQUIRE_EQUAL(0, ::unlink(testPipeName.c_str())); #endif } diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 641552f120..888efcc536 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -48,9 +48,6 @@ TEST_DIR=${CPP_SRC_HOME}/$(echo $BINARY_DIR | sed "s|$BUILD_DIR/test/||") export TEST_EXECUTABLE="$2/ml_$3" export LOG_DIR="$2/test_logs" -MAX_ARGS=2 -MAX_PROCS=4 - if [[ -n "$BOOST_TEST_MAX_ARGS" ]]; then MAX_ARGS=$BOOST_TEST_MAX_ARGS fi @@ -62,6 +59,17 @@ fi rm -rf "$LOG_DIR" mkdir -p "$LOG_DIR" +function num_procs() { + if [ `uname` = "Darwin" ]; then + sysctl -n hw.logicalcpu + else + nproc + fi +} + +MAX_ARGS=1 +MAX_PROCS=$(num_procs) + function get_qualified_test_names() { executable_path=$1 @@ -90,8 +98,6 @@ if [ -z "$ALL_TEST_NAMES" ]; then exit 1 fi -EXIT_CODE=0 - function execute_tests() { if [[ "$BOOST_TEST_MIXED_MODE" == "true" ]]; then @@ -117,7 +123,8 @@ function execute_tests() { echo "Test '$TEST_NAME' PASSED." else echo "Test '$TEST_NAME' FAILED with exit code $TEST_STATUS. Check '$LOG_FILE' for details." - EXIT_CODE=1 # Indicate overall failure if any test fails + echo "touch $SAFE_TEST_LOG_FILENAME.failed" + touch $SAFE_TEST_LOG_FILENAME.failed fi done } @@ -125,12 +132,15 @@ function execute_tests() { export -f execute_tests echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests "$@"' _ - + echo "--------------------------------------------------" -if [ $EXIT_CODE -eq 0 ]; then - echo "$TEST_SUITE: All individual tests PASSED." -else + +if test -n "$(find . -maxdepth 1 -name '*.failed' -print -quit)" +then echo "$TEST_SUITE: Some individual tests FAILED. Check logs in '$LOG_DIR'." + echo found +else + echo "$TEST_SUITE: All individual tests PASSED." fi function merge_junit_results() { @@ -168,4 +178,3 @@ if [ "$TEST_SUITE" != "test_seccomp" ]; then merge_junit_results $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit fi -exit $EXIT_CODE From 4014a721397f66e37183f7ee03b878cd8f3b4680 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 8 Jul 2025 15:02:57 +1200 Subject: [PATCH 39/49] Formatting --- lib/api/unittest/CMultiFileDataAdderTest.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index cd86625d07..1d718b9eb2 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -37,8 +37,8 @@ #include #include #include -#include #include // For random number generation facilities +#include #include #include @@ -125,7 +125,7 @@ void detectorPersistHelper(const std::string& configFileName, std::string temp; TStrVec origFileContents(numOrigDocs); for (size_t index = 0; index < numOrigDocs; ++index) { - std::string expectedOrigFilename(baseOrigOutputFilename+"_"+oss.str()+"_"); + std::string expectedOrigFilename(baseOrigOutputFilename + "_" + oss.str() + "_"); expectedOrigFilename += "/_index/"; expectedOrigFilename += ml::core::CDataAdder::makeCurrentDocId(origBaseDocId, 1 + index); From e3d2f8adcbc431a8603e14a09e562318ff8b5b51 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 8 Jul 2025 15:07:15 +1200 Subject: [PATCH 40/49] Formatting --- lib/core/unittest/CLoggerTest.cc | 4 ++-- lib/core/unittest/CNamedPipeFactoryTest.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/core/unittest/CLoggerTest.cc b/lib/core/unittest/CLoggerTest.cc index d41d4cce3a..e626fb8fcd 100644 --- a/lib/core/unittest/CLoggerTest.cc +++ b/lib/core/unittest/CLoggerTest.cc @@ -205,7 +205,7 @@ BOOST_FIXTURE_TEST_CASE(testNonAsciiJsonLogging, CTestFixture) { "Non-iso8859-15: 编码 test", "surrogate pair: 𐐷 test"}; std::ostringstream loggedData; - const std::string& pipeName = std::string{TEST_PIPE_NAME}+"_testNonAsciiJsonLogging"; + const std::string& pipeName = std::string{TEST_PIPE_NAME} + "_testNonAsciiJsonLogging"; std::thread reader(makeReader(loggedData, pipeName)); ml::core::CLogger& logger = ml::core::CLogger::instance(); @@ -227,7 +227,7 @@ BOOST_FIXTURE_TEST_CASE(testNonAsciiJsonLogging, CTestFixture) { BOOST_FIXTURE_TEST_CASE(testWarnAndErrorThrottling, CTestFixture) { std::ostringstream loggedData; - const std::string& pipeName = std::string{TEST_PIPE_NAME}+"_testWarnAndErrorThrottling"; + const std::string& pipeName = std::string{TEST_PIPE_NAME} + "_testWarnAndErrorThrottling"; std::thread reader{makeReader(loggedData, pipeName)}; diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index e94fcd6893..19dc01323b 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -198,7 +198,7 @@ BOOST_AUTO_TEST_CASE(testCancelBlock) { BOOST_TEST_REQUIRE(cancellerThread.start()); ml::core::CNamedPipeFactory::TOStreamP strm{ml::core::CNamedPipeFactory::openPipeStreamWrite( - TEST_PIPE_NAME+"_testCancelBlock", cancellerThread.hasCancelledBlockingCall())}; + TEST_PIPE_NAME + "_testCancelBlock", cancellerThread.hasCancelledBlockingCall())}; BOOST_TEST_REQUIRE(strm == nullptr); BOOST_TEST_REQUIRE(cancellerThread.stop()); @@ -221,7 +221,7 @@ BOOST_AUTO_TEST_CASE(testErrorIfSymlink) { BOOST_REQUIRE(BOOST_IS_DEFINED(Windows)); #else const std::string TEST_SYMLINK_NAME{"test_symlink_testErrorIfSymlink"}; - const std::string testPipeName{TEST_PIPE_NAME+"_test_symlink_testErrorIfSymlink"}; + const std::string testPipeName{TEST_PIPE_NAME + "_test_symlink_testErrorIfSymlink"}; // Remove any files left behind by a previous failed test, but don't check // the return codes as these calls will usually fail From 7b398999b76e5ffc59b3e1b8ed93041617543939 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Tue, 8 Jul 2025 16:39:02 +1200 Subject: [PATCH 41/49] Reworking test case --- lib/api/unittest/CMultiFileDataAdderTest.cc | 26 +++++++++------------ 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index 1d718b9eb2..f03dd81fd6 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -102,7 +102,16 @@ void detectorPersistHelper(const std::string& configFileName, // Persist the detector state to file(s) - std::string baseOrigOutputFilename(ml::test::CTestTmpDir::tmpDir() + "/orig"); + // Create a random number to use to generate a unique file name for each test + // this allows tests to be run successfully in parallel + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distrib(1, 100); + std::ostringstream oss; + oss << distrib(gen); + + std::string baseOrigOutputFilename(ml::test::CTestTmpDir::tmpDir() + + "/orig_" + oss.str()); { // Clean up any leftovers of previous failures boost::filesystem::path origDir(baseOrigOutputFilename); @@ -114,18 +123,10 @@ void detectorPersistHelper(const std::string& configFileName, std::string origBaseDocId(JOB_ID + '_' + CTestAnomalyJob::STATE_TYPE + '_' + origSnapshotId); - // Create a random number to use to generate a unique file name for each test - // this allows tests to be run successfully in parallel - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> distrib(1, 100); - std::ostringstream oss; - oss << distrib(gen); - std::string temp; TStrVec origFileContents(numOrigDocs); for (size_t index = 0; index < numOrigDocs; ++index) { - std::string expectedOrigFilename(baseOrigOutputFilename + "_" + oss.str() + "_"); + std::string expectedOrigFilename(baseOrigOutputFilename); expectedOrigFilename += "/_index/"; expectedOrigFilename += ml::core::CDataAdder::makeCurrentDocId(origBaseDocId, 1 + index); @@ -257,10 +258,6 @@ BOOST_AUTO_TEST_CASE(testSimpleWrite) { BOOST_REQUIRE_NO_THROW(boost::filesystem::remove_all(workDir)); } -#ifndef Linux // These disabled tests all fail when run as part of a full ml_test_api run on Linux, due to hard memory limits being hit. -// This is due to the ResourceMonitor.totalMemory() returning max_rss on that platform -// which means, as it never decreases for the lifetime of the ml_test_api process, that -// prior test cases can affect latter ones. BOOST_AUTO_TEST_CASE(testDetectorPersistBy) { detectorPersistHelper("testfiles/new_mlfields.json", "testfiles/big_ascending.txt", 0, "%d/%b/%Y:%T %z"); @@ -285,6 +282,5 @@ BOOST_AUTO_TEST_CASE(testDetectorPersistCount) { detectorPersistHelper("testfiles/new_persist_count.json", "testfiles/files_users_programs.csv", 5); } -#endif BOOST_AUTO_TEST_SUITE_END() From 58fafb40bca5cfb7f4e0767861a8a11f1fa1d0da Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Wed, 9 Jul 2025 15:45:14 +1200 Subject: [PATCH 42/49] Small tidy up --- cmake/test-runner.cmake | 10 +--------- run_tests_as_seperate_processes.sh | 23 +++++++++++------------ 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/cmake/test-runner.cmake b/cmake/test-runner.cmake index 481b2dd092..b17d85ba6f 100644 --- a/cmake/test-runner.cmake +++ b/cmake/test-runner.cmake @@ -28,33 +28,25 @@ if (DEFINED ENV{TESTS} AND NOT "$ENV{TESTS}" STREQUAL "") set(SAFE_TEST_NAME "_${SAFE_TEST_NAME}") endif() -message(STATUS "SAFE_TEST_NAME=${SAFE_TEST_NAME}") string(REPLACE "boost_test_results" "boost_test_results${SAFE_TEST_NAME}" BOOST_TEST_OUTPUT_FORMAT_FLAGS "$ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS}") set(OUTPUT_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.out") set(FAILED_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.failed") # If env var RUN_BOOST_TESTS_IN_FOREGROUND is defined run the tests in the foreground -message(STATUS "RUN_BOOST_TESTS_IN_FOREGROUND=$ENV{RUN_BOOST_TESTS_IN_FOREGROUND}") - if(TEST_NAME STREQUAL "ml_test_seccomp") execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --logger=HRF,all --report_format=HRF --show_progress=no --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) else() if(NOT DEFINED ENV{RUN_BOOST_TESTS_IN_FOREGROUND}) - message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output") execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) else() - message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS}") execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) endif() endif() -message(STATUS "TESTS EXITED WITH SUCCESS ${TEST_SUCCESS}") - if (NOT TEST_SUCCESS EQUAL 0) if (EXISTS ${TEST_DIR}/${TEST_NAME}) execute_process(COMMAND ${CMAKE_COMMAND} -E cat ${OUTPUT_FILE}) - file(WRITE "${TEST_DIR}/${FAILED_FILE}" "") + file(WRITE "${FAILED_FILE}" "") endif() - message(FATAL_ERROR "Exiting with status ${TEST_SUCCESS}") endif() diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index 888efcc536..a59149a600 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -48,17 +48,6 @@ TEST_DIR=${CPP_SRC_HOME}/$(echo $BINARY_DIR | sed "s|$BUILD_DIR/test/||") export TEST_EXECUTABLE="$2/ml_$3" export LOG_DIR="$2/test_logs" -if [[ -n "$BOOST_TEST_MAX_ARGS" ]]; then - MAX_ARGS=$BOOST_TEST_MAX_ARGS -fi - -if [[ -n "$BOOST_TEST_MAX_PROCS" ]]; then - MAX_PROCS=$BOOST_TEST_MAX_PROCS -fi - -rm -rf "$LOG_DIR" -mkdir -p "$LOG_DIR" - function num_procs() { if [ `uname` = "Darwin" ]; then sysctl -n hw.logicalcpu @@ -70,6 +59,17 @@ function num_procs() { MAX_ARGS=1 MAX_PROCS=$(num_procs) +if [[ -n "$BOOST_TEST_MAX_ARGS" ]]; then + MAX_ARGS=$BOOST_TEST_MAX_ARGS +fi + +if [[ -n "$BOOST_TEST_MAX_PROCS" ]]; then + MAX_PROCS=$BOOST_TEST_MAX_PROCS +fi + +rm -rf "$LOG_DIR" +mkdir -p "$LOG_DIR" + function get_qualified_test_names() { executable_path=$1 @@ -123,7 +123,6 @@ function execute_tests() { echo "Test '$TEST_NAME' PASSED." else echo "Test '$TEST_NAME' FAILED with exit code $TEST_STATUS. Check '$LOG_FILE' for details." - echo "touch $SAFE_TEST_LOG_FILENAME.failed" touch $SAFE_TEST_LOG_FILENAME.failed fi done From 033adef3829f672d826466d9202fe290ea9db92e Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 11 Jul 2025 16:41:47 +1200 Subject: [PATCH 43/49] Minor tweaks to test runner script --- run_tests_as_seperate_processes.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index a59149a600..deb60a4298 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -39,11 +39,11 @@ if [ $# -lt 3 ]; then exit fi -export BUILD_DIR=$1 -export BINARY_DIR=$2 +export BUILD_DIR=$( echo $1 | sed 's|/$||' ) +export BINARY_DIR=$( echo $2 | sed 's|/$||' ) export TEST_SUITE=$3 -TEST_DIR=${CPP_SRC_HOME}/$(echo $BINARY_DIR | sed "s|$BUILD_DIR/test/||") +TEST_DIR=${CPP_SRC_HOME}/$(echo $BINARY_DIR | sed -e "s|$BUILD_DIR/test/||" -e 's|unittest.*|unittest|') export TEST_EXECUTABLE="$2/ml_$3" export LOG_DIR="$2/test_logs" @@ -123,18 +123,18 @@ function execute_tests() { echo "Test '$TEST_NAME' PASSED." else echo "Test '$TEST_NAME' FAILED with exit code $TEST_STATUS. Check '$LOG_FILE' for details." - touch $SAFE_TEST_LOG_FILENAME.failed fi done } export -f execute_tests -echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests "$@"' _ +RESULTS=$(echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests "$@"' _) echo "--------------------------------------------------" -if test -n "$(find . -maxdepth 1 -name '*.failed' -print -quit)" +grep 'FAILED with exit code' <<< $RESULT +if [ $? -eq 0 ] then echo "$TEST_SUITE: Some individual tests FAILED. Check logs in '$LOG_DIR'." echo found @@ -173,7 +173,7 @@ echo "" echo } -if [ "$TEST_SUITE" != "test_seccomp" ]; then +if [[ $BOOST_TEST_OUTPUT_FORMAT_FLAGS =~ junit ]]; then merge_junit_results $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit fi From fe71d2abb1617462056792fa9b5afc91945e8e74 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 14 Jul 2025 09:56:33 +1200 Subject: [PATCH 44/49] typo --- run_tests_as_seperate_processes.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh index deb60a4298..4208be8fd4 100755 --- a/run_tests_as_seperate_processes.sh +++ b/run_tests_as_seperate_processes.sh @@ -129,7 +129,7 @@ function execute_tests() { export -f execute_tests -RESULTS=$(echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests "$@"' _) +RESULT=$(echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests "$@"' _) echo "--------------------------------------------------" From bbe25b13804fa9d1c24faa2a4d56fc44428595b5 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 25 Jul 2025 14:54:34 +1200 Subject: [PATCH 45/49] Set both adjusted usage and adjusted peak usage to system memory. On Linux both adjusted usage and adjusted peak usage are set to system memory usage (max resident set size) These are the values reported back to the Java process, they are not used for any other purpose. --- lib/model/CResourceMonitor.cc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 83ec624b39..18e150d7aa 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -380,10 +380,12 @@ CResourceMonitor::SModelSizeStats CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; res.s_Usage = this->totalMemory(); - res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage); res.s_PeakUsage = static_cast( - core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); - res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage); + core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); + // On Linux both adjusted usage and adjusted peak usage are set to system memory usage (max resident set size) + // These are the values reported back to the Java process, they are not used for any other purpose. + res.s_AdjustedUsage = this->applyMemoryStrategy(this->adjustedUsage(res.s_Usage)); + res.s_AdjustedPeakUsage = this->applyMemoryStrategy(this->adjustedUsage(res.s_PeakUsage)); res.s_BytesMemoryLimit = this->getBytesMemoryLimit(); res.s_BytesExceeded = m_CurrentBytesExceeded; res.s_MemoryStatus = m_MemoryStatus; @@ -504,9 +506,9 @@ std::size_t CResourceMonitor::lowLimit() const { } std::size_t CResourceMonitor::totalMemory() const { - return this->applyMemoryStrategy(m_MonitoredResourceCurrentMemory + m_ExtraMemory + - static_cast(core::CProgramCounters::counter( - counter_t::E_TSADOutputMemoryAllocatorUsage))); + return m_MonitoredResourceCurrentMemory + m_ExtraMemory + + static_cast(core::CProgramCounters::counter( + counter_t::E_TSADOutputMemoryAllocatorUsage)); } std::size_t CResourceMonitor::systemMemory() { From 3ab09bafd4c3edfec5d21c8b263a6f45c69ef592 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 25 Jul 2025 15:17:55 +1200 Subject: [PATCH 46/49] Formatting --- lib/model/CResourceMonitor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/model/CResourceMonitor.cc b/lib/model/CResourceMonitor.cc index 18e150d7aa..f7131daba9 100644 --- a/lib/model/CResourceMonitor.cc +++ b/lib/model/CResourceMonitor.cc @@ -381,7 +381,7 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) { SModelSizeStats res; res.s_Usage = this->totalMemory(); res.s_PeakUsage = static_cast( - core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); + core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage)); // On Linux both adjusted usage and adjusted peak usage are set to system memory usage (max resident set size) // These are the values reported back to the Java process, they are not used for any other purpose. res.s_AdjustedUsage = this->applyMemoryStrategy(this->adjustedUsage(res.s_Usage)); From 39b4f3b3afcbbec2552c608fba64b85f28f1f8b5 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 28 Jul 2025 11:24:24 +1200 Subject: [PATCH 47/49] Revert "Merge branch 'run_tests_individually' of github.com:edsavage/ml-cpp into ad_real_mem_usage" This reverts commit 13fa5cfbc179bd13a33119c50e59eb3d8faf3595, reversing changes made to e8c00e736ffa705de2e8c4e7a6123fcb863bd227. --- .buildkite/pipelines/build_linux.json.py | 1 + .buildkite/scripts/steps/build_and_test.sh | 2 +- 3rd_party/3rd_party.cmake | 19 +-- 3rd_party/CMakeLists.txt | 2 +- cmake/compiler/clang.cmake | 2 +- cmake/functions.cmake | 9 - cmake/test-runner.cmake | 52 +++--- dev-tools/docker/docker_entrypoint.sh | 2 +- dev-tools/docker_test.sh | 5 +- lib/api/unittest/CMultiFileDataAdderTest.cc | 13 +- lib/core/unittest/CLoggerTest.cc | 20 +-- lib/core/unittest/CNamedPipeFactoryTest.cc | 48 +++--- run_tests_as_seperate_processes.sh | 179 -------------------- test/CMakeLists.txt | 9 - 14 files changed, 60 insertions(+), 303 deletions(-) delete mode 100755 run_tests_as_seperate_processes.sh diff --git a/.buildkite/pipelines/build_linux.json.py b/.buildkite/pipelines/build_linux.json.py index efe313b155..56c6002aaf 100755 --- a/.buildkite/pipelines/build_linux.json.py +++ b/.buildkite/pipelines/build_linux.json.py @@ -72,6 +72,7 @@ def main(args): "RUN_TESTS": "true", "BOOST_TEST_OUTPUT_FORMAT_FLAGS": "--logger=JUNIT,error,boost_test_results.junit", }, + "artifact_paths": "*/**/unittest/boost_test_results.junit", "plugins": { "test-collector#v1.2.0": { "files": "*/*/unittest/boost_test_results.junit", diff --git a/.buildkite/scripts/steps/build_and_test.sh b/.buildkite/scripts/steps/build_and_test.sh index 023d4c7488..5fc133874c 100755 --- a/.buildkite/scripts/steps/build_and_test.sh +++ b/.buildkite/scripts/steps/build_and_test.sh @@ -106,7 +106,7 @@ fi if [[ -z "$CPP_CROSS_COMPILE" ]] ; then OS=$(uname -s | tr "A-Z" "a-z") TEST_RESULTS_ARCHIVE=${OS}-${HARDWARE_ARCH}-unit_test_results.tgz - find . \( -path "*/**/ml_test_*.out" -o -path "*/**/*.junit" \) -print0 | tar czf ${TEST_RESULTS_ARCHIVE} --null -T - + find . -path "*/**/ml_test_*.out" -o -path "*/**/*.junit" | xargs tar cvzf ${TEST_RESULTS_ARCHIVE} buildkite-agent artifact upload "${TEST_RESULTS_ARCHIVE}" fi diff --git a/3rd_party/3rd_party.cmake b/3rd_party/3rd_party.cmake index 5878d7f86b..2c9d796221 100644 --- a/3rd_party/3rd_party.cmake +++ b/3rd_party/3rd_party.cmake @@ -25,10 +25,6 @@ if(NOT INSTALL_DIR) message(FATAL_ERROR "INSTALL_DIR not specified") endif() -STRING(REPLACE "//" "/" INSTALL_DIR ${INSTALL_DIR}) - -message(STATUS "3rd_party: CMAKE_CXX_COMPILER_VERSION_MAJOR=${CMAKE_CXX_COMPILER_VERSION_MAJOR}") - string(TOLOWER ${CMAKE_HOST_SYSTEM_NAME} HOST_SYSTEM_NAME) message(STATUS "3rd_party: HOST_SYSTEM_NAME=${HOST_SYSTEM_NAME}") @@ -47,9 +43,7 @@ set(ARCH ${HOST_SYSTEM_PROCESSOR}) if ("${HOST_SYSTEM_NAME}" STREQUAL "darwin") message(STATUS "3rd_party: Copying macOS 3rd party libraries") set(BOOST_LOCATION "/usr/local/lib") - set(BOOST_COMPILER "clang-darwin${CMAKE_CXX_COMPILER_VERSION_MAJOR}") - message(STATUS "3rd_party: BOOST_COMPILER=${BOOST_COMPILER}") - + set(BOOST_COMPILER "clang") if( "${ARCH}" STREQUAL "x86_64" ) set(BOOST_ARCH "x64") else() @@ -69,7 +63,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") if(NOT DEFINED ENV{CPP_CROSS_COMPILE} OR "$ENV{CPP_CROSS_COMPILE}" STREQUAL "") message(STATUS "3rd_party: NOT cross compiling. Copying Linux 3rd party libraries") set(BOOST_LOCATION "/usr/local/gcc133/lib") - set(BOOST_COMPILER "gcc${CMAKE_CXX_COMPILER_VERSION_MAJOR}") + set(BOOST_COMPILER "gcc") if( "${ARCH}" STREQUAL "aarch64" ) set(BOOST_ARCH "a64") else() @@ -99,7 +93,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") message(STATUS "3rd_party: Cross compile for macosx: Copying macOS 3rd party libraries") set(SYSROOT "/usr/local/sysroot-x86_64-apple-macosx10.14") set(BOOST_LOCATION "${SYSROOT}/usr/local/lib") - set(BOOST_COMPILER "clang-darwin${CMAKE_CXX_COMPILER_VERSION_MAJOR}") + set(BOOST_COMPILER "clang") set(BOOST_EXTENSION "mt-x64-1_86.dylib") set(BOOST_LIBRARIES "atomic" "chrono" "date_time" "filesystem" "iostreams" "log" "log_setup" "program_options" "regex" "system" "thread" "unit_test_framework") set(XML_LOCATION) @@ -114,7 +108,7 @@ elseif ("${HOST_SYSTEM_NAME}" STREQUAL "linux") message(STATUS "3rd_party: Cross compile for linux-aarch64: Copying Linux 3rd party libraries") set(SYSROOT "/usr/local/sysroot-$ENV{CPP_CROSS_COMPILE}-linux-gnu") set(BOOST_LOCATION "${SYSROOT}/usr/local/gcc133/lib") - set(BOOST_COMPILER "gcc${CMAKE_CXX_COMPILER_VERSION_MAJOR}") + set(BOOST_COMPILER "gcc") if("$ENV{CPP_CROSS_COMPILE}" STREQUAL "aarch64") set(BOOST_ARCH "a64") else() @@ -194,9 +188,6 @@ function(install_libs _target _source_dir _prefix _postfix) set(LIBRARIES ${ARGN}) - message(STATUS "_target=${_target} _source_dir=${_source_dir} _prefix=${_prefix} _postfix=${_postfix} LIBRARIES=${LIBRARIES}") - - file(GLOB _LIBS ${_source_dir}/*${_prefix}*${_postfix}) if(_LIBS) @@ -228,7 +219,7 @@ function(install_libs _target _source_dir _prefix _postfix) endif() file(CHMOD ${INSTALL_DIR}/${_LIB} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) else() - file(COPY ${_RESOLVED_PATH} DESTINATION "${INSTALL_DIR}") + file(COPY ${_RESOLVED_PATH} DESTINATION ${INSTALL_DIR}) file(CHMOD ${INSTALL_DIR}/${_RESOLVED_LIB} PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) endif() endforeach() diff --git a/3rd_party/CMakeLists.txt b/3rd_party/CMakeLists.txt index f2b092f913..ffdb8e093e 100644 --- a/3rd_party/CMakeLists.txt +++ b/3rd_party/CMakeLists.txt @@ -22,7 +22,7 @@ add_custom_target(licenses ALL # as part of the CMake configuration step - avoiding # the need for it to be done on every build execute_process( - COMMAND ${CMAKE_COMMAND} -DINSTALL_DIR=${INSTALL_DIR} -DCMAKE_CXX_COMPILER_VERSION_MAJOR=${CMAKE_CXX_COMPILER_VERSION_MAJOR} -P ./3rd_party.cmake + COMMAND ${CMAKE_COMMAND} -DINSTALL_DIR=${INSTALL_DIR} -P ./3rd_party.cmake WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) diff --git a/cmake/compiler/clang.cmake b/cmake/compiler/clang.cmake index cc4042dbc7..1749ad0a89 100644 --- a/cmake/compiler/clang.cmake +++ b/cmake/compiler/clang.cmake @@ -16,7 +16,7 @@ set(CMAKE_RANLIB "ranlib") set(CMAKE_STRIP "strip") -list(APPEND ML_C_FLAGS +list(APPEND ML_C_FLAGS ${CROSS_FLAGS} ${ARCHCFLAGS} "-fstack-protector" diff --git a/cmake/functions.cmake b/cmake/functions.cmake index ea8070a712..c39a86089a 100644 --- a/cmake/functions.cmake +++ b/cmake/functions.cmake @@ -392,13 +392,6 @@ function(ml_add_test_executable _target) COMMENT "Running test: ml_test_${_target}" WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) - - add_custom_target(test_${_target}_individually - DEPENDS ml_test_${_target} - COMMAND ${CMAKE_SOURCE_DIR}/run_tests_as_seperate_processes.sh ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR} test_${_target} - COMMENT "Running test: ml_test_${_target}_individually" - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - ) endif() endfunction() @@ -427,10 +420,8 @@ function(ml_add_test _directory _target) add_subdirectory(../${_directory} ${_directory}) list(APPEND ML_BUILD_TEST_DEPENDS ml_test_${_target}) list(APPEND ML_TEST_DEPENDS test_${_target}) - list(APPEND ML_TEST_INDIVIDUALLY_DEPENDS test_${_target}_individually) set(ML_BUILD_TEST_DEPENDS ${ML_BUILD_TEST_DEPENDS} PARENT_SCOPE) set(ML_TEST_DEPENDS ${ML_TEST_DEPENDS} PARENT_SCOPE) - set(ML_TEST_INDIVIDUALLY_DEPENDS ${ML_TEST_INDIVIDUALLY_DEPENDS} PARENT_SCOPE) endfunction() diff --git a/cmake/test-runner.cmake b/cmake/test-runner.cmake index b17d85ba6f..2bba0cb5c4 100644 --- a/cmake/test-runner.cmake +++ b/cmake/test-runner.cmake @@ -9,44 +9,30 @@ # limitation. # -execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f ${TEST_DIR}/*.out) -execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f ${TEST_DIR}/*.failed) -execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f boost_test_results*.xml) -execute_process(COMMAND ${CMAKE_COMMAND} -E rm -f boost_test_results*.junit) - -# Turn the TEST_FLAGS environment variable into a CMake list variable -if (DEFINED ENV{TEST_FLAGS} AND NOT "$ENV{TEST_FLAGS}" STREQUAL "") - string(REPLACE " " ";" TEST_FLAGS $ENV{TEST_FLAGS}) -endif() - -set(SAFE_TEST_NAME "") -set(TESTS "") -# Special case for specifying a subset of tests to run (can be regex) -if (DEFINED ENV{TESTS} AND NOT "$ENV{TESTS}" STREQUAL "") - set(TESTS "--run_test=$ENV{TESTS}") - string(REGEX REPLACE "[^a-zA-Z0-9_]" "_" SAFE_TEST_NAME "$ENV{TESTS}") - set(SAFE_TEST_NAME "_${SAFE_TEST_NAME}") -endif() - -string(REPLACE "boost_test_results" "boost_test_results${SAFE_TEST_NAME}" BOOST_TEST_OUTPUT_FORMAT_FLAGS "$ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS}") -set(OUTPUT_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.out") -set(FAILED_FILE "${TEST_DIR}/${TEST_NAME}${SAFE_TEST_NAME}.failed") - -# If env var RUN_BOOST_TESTS_IN_FOREGROUND is defined run the tests in the foreground if(TEST_NAME STREQUAL "ml_test_seccomp") - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --logger=HRF,all --report_format=HRF --show_progress=no --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} --logger=HRF,all --report_format=HRF --show_progress=no --no_color_output OUTPUT_FILE ${TEST_DIR}/${TEST_NAME}.out ERROR_FILE ${TEST_DIR}/${TEST_NAME}.out RESULT_VARIABLE TEST_SUCCESS) else() - if(NOT DEFINED ENV{RUN_BOOST_TESTS_IN_FOREGROUND}) - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} --no_color_output OUTPUT_FILE ${OUTPUT_FILE} ERROR_FILE ${OUTPUT_FILE} RESULT_VARIABLE TEST_SUCCESS) + # Turn the TEST_FLAGS environment variable into a CMake list variable + if (DEFINED ENV{TEST_FLAGS} AND NOT "$ENV{TEST_FLAGS}" STREQUAL "") + string(REPLACE " " ";" TEST_FLAGS $ENV{TEST_FLAGS}) + endif() + + # Special case for specifying a subset of tests to run (can be regex) + if (DEFINED ENV{TESTS} AND NOT "$ENV{TESTS}" STREQUAL "") + set(TESTS "--run_test=$ENV{TESTS}") + endif() + + # If any special command line args are present run the tests in the foreground + if (DEFINED TEST_FLAGS OR DEFINED TESTS) + message(STATUS "executing process ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS}") + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) else() - execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} ${TEST_FLAGS} ${TESTS} ${BOOST_TEST_OUTPUT_FORMAT_FLAGS} RESULT_VARIABLE TEST_SUCCESS) + execute_process(COMMAND ${TEST_DIR}/${TEST_NAME} $ENV{TEST_FLAGS} $ENV{BOOST_TEST_OUTPUT_FORMAT_FLAGS} + --no_color_output OUTPUT_FILE ${TEST_DIR}/${TEST_NAME}.out ERROR_FILE ${TEST_DIR}/${TEST_NAME}.out RESULT_VARIABLE TEST_SUCCESS) endif() endif() if (NOT TEST_SUCCESS EQUAL 0) - if (EXISTS ${TEST_DIR}/${TEST_NAME}) - execute_process(COMMAND ${CMAKE_COMMAND} -E cat ${OUTPUT_FILE}) - file(WRITE "${FAILED_FILE}" "") - endif() + execute_process(COMMAND ${CMAKE_COMMAND} -E cat ${TEST_DIR}/${TEST_NAME}.out) + file(WRITE "${TEST_DIR}/${TEST_NAME}.failed" "") endif() - diff --git a/dev-tools/docker/docker_entrypoint.sh b/dev-tools/docker/docker_entrypoint.sh index 8311efffe4..475c05344f 100755 --- a/dev-tools/docker/docker_entrypoint.sh +++ b/dev-tools/docker/docker_entrypoint.sh @@ -66,6 +66,6 @@ if [ "x$1" = "x--test" ] ; then # failure is the unit tests, and then the detailed test results can be # copied from the image echo passed > build/test_status.txt - cmake --build cmake-build-docker ${CMAKE_VERBOSE} -j $(nproc) -t test_individually || echo failed > build/test_status.txt + cmake --build cmake-build-docker ${CMAKE_VERBOSE} -j`nproc` -t test || echo failed > build/test_status.txt fi diff --git a/dev-tools/docker_test.sh b/dev-tools/docker_test.sh index 22a46f4c9f..aed18fa609 100755 --- a/dev-tools/docker_test.sh +++ b/dev-tools/docker_test.sh @@ -92,10 +92,7 @@ do # Using tar to copy the build and test artifacts out of the container seems # more reliable than docker cp, and also means the files end up with the # correct uid/gid - docker run --rm --workdir=/ml-cpp $TEMP_TAG bash -c "find . \( $EXTRACT_FIND \) -print0 | tar cf - $EXTRACT_EXPLICIT --null -T -" | tar xvf - - if [ $? != 0 ]; then - echo "Copying build and test artifacts from docker container failed" - fi + docker run --rm --workdir=/ml-cpp $TEMP_TAG bash -c "find . $EXTRACT_FIND | xargs tar cf - $EXTRACT_EXPLICIT && sleep 30" | tar xvf - docker rmi --force $TEMP_TAG # The image build is set to return zero (i.e. succeed as far as Docker is # concerned) when the only problem is that the unit tests fail, as this diff --git a/lib/api/unittest/CMultiFileDataAdderTest.cc b/lib/api/unittest/CMultiFileDataAdderTest.cc index f03dd81fd6..96231c5679 100644 --- a/lib/api/unittest/CMultiFileDataAdderTest.cc +++ b/lib/api/unittest/CMultiFileDataAdderTest.cc @@ -37,8 +37,6 @@ #include #include #include -#include // For random number generation facilities -#include #include #include @@ -102,16 +100,7 @@ void detectorPersistHelper(const std::string& configFileName, // Persist the detector state to file(s) - // Create a random number to use to generate a unique file name for each test - // this allows tests to be run successfully in parallel - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> distrib(1, 100); - std::ostringstream oss; - oss << distrib(gen); - - std::string baseOrigOutputFilename(ml::test::CTestTmpDir::tmpDir() + - "/orig_" + oss.str()); + std::string baseOrigOutputFilename(ml::test::CTestTmpDir::tmpDir() + "/orig"); { // Clean up any leftovers of previous failures boost::filesystem::path origDir(baseOrigOutputFilename); diff --git a/lib/core/unittest/CLoggerTest.cc b/lib/core/unittest/CLoggerTest.cc index e626fb8fcd..8667aa1430 100644 --- a/lib/core/unittest/CLoggerTest.cc +++ b/lib/core/unittest/CLoggerTest.cc @@ -49,13 +49,12 @@ class CTestFixture { } }; -std::function makeReader(std::ostringstream& loggedData, const std::string& pipeName) { - return [&loggedData, &pipeName]() { - +std::function makeReader(std::ostringstream& loggedData) { + return [&loggedData] { for (std::size_t attempt = 1; attempt <= 100; ++attempt) { // wait a bit so that pipe has been created std::this_thread::sleep_for(std::chrono::milliseconds(50)); - std::ifstream strm(pipeName); + std::ifstream strm(TEST_PIPE_NAME); if (strm.is_open()) { std::copy(std::istreambuf_iterator(strm), std::istreambuf_iterator(), @@ -63,7 +62,7 @@ std::function makeReader(std::ostringstream& loggedData, const std::stri return; } } - BOOST_FAIL("Failed to connect to logging pipe " + pipeName + " within a reasonable time"); + BOOST_TEST_CHECK(false, "Failed to connect to logging pipe within a reasonable time"); }; } @@ -205,13 +204,12 @@ BOOST_FIXTURE_TEST_CASE(testNonAsciiJsonLogging, CTestFixture) { "Non-iso8859-15: 编码 test", "surrogate pair: 𐐷 test"}; std::ostringstream loggedData; - const std::string& pipeName = std::string{TEST_PIPE_NAME} + "_testNonAsciiJsonLogging"; - std::thread reader(makeReader(loggedData, pipeName)); + std::thread reader(makeReader(loggedData)); ml::core::CLogger& logger = ml::core::CLogger::instance(); // logger might have been reconfigured in previous tests, so reset and reconfigure it logger.reset(); - logger.reconfigure(pipeName, ""); + logger.reconfigure(TEST_PIPE_NAME, ""); for (const auto& m : messages) { LOG_INFO(<< m); @@ -227,16 +225,14 @@ BOOST_FIXTURE_TEST_CASE(testNonAsciiJsonLogging, CTestFixture) { BOOST_FIXTURE_TEST_CASE(testWarnAndErrorThrottling, CTestFixture) { std::ostringstream loggedData; - const std::string& pipeName = std::string{TEST_PIPE_NAME} + "_testWarnAndErrorThrottling"; - - std::thread reader{makeReader(loggedData, pipeName)}; + std::thread reader{makeReader(loggedData)}; TStrVec messages{"Warn should only be seen once", "Error should only be seen once"}; ml::core::CLogger& logger = ml::core::CLogger::instance(); // logger might have been reconfigured in previous tests, so reset and reconfigure it logger.reset(); - logger.reconfigure(pipeName, ""); + logger.reconfigure(TEST_PIPE_NAME, ""); for (std::size_t i = 0; i < 10; ++i) { LOG_WARN(<< messages[0]); diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index ad0436c999..6ad24c5829 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -38,9 +38,9 @@ const std::size_t MAX_ATTEMPTS{100}; const std::size_t TEST_SIZE{10000}; const char TEST_CHAR{'a'}; #ifdef Windows -const std::string TEST_PIPE_NAME{"\\\\.\\pipe\\testpipe"}; +const char* const TEST_PIPE_NAME{"\\\\.\\pipe\\testpipe"}; #else -const std::string TEST_PIPE_NAME{"testfiles/testpipe"}; +const char* const TEST_PIPE_NAME{"testfiles/testpipe"}; #endif class CThreadBlockCanceller : public ml::core::CThread { @@ -71,13 +71,13 @@ class CThreadBlockCanceller : public ml::core::CThread { } BOOST_AUTO_TEST_CASE(testServerIsCppReader) { - const std::string pipeName = TEST_PIPE_NAME + "_testServerIsCppReader"; - ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, TEST_CHAR, TEST_SIZE}; + ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, TEST_PIPE_NAME, + TEST_CHAR, TEST_SIZE}; BOOST_TEST_REQUIRE(threadWriter.start()); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TIStreamP strm{ - ml::core::CNamedPipeFactory::openPipeStreamRead(pipeName, dummy)}; + ml::core::CNamedPipeFactory::openPipeStreamRead(TEST_PIPE_NAME, dummy)}; BOOST_TEST_REQUIRE(strm); static const std::streamsize BUF_SIZE{512}; @@ -100,14 +100,13 @@ BOOST_AUTO_TEST_CASE(testServerIsCppReader) { } BOOST_AUTO_TEST_CASE(testServerIsCReader) { - const std::string pipeName = TEST_PIPE_NAME + "_testServerIsCReader"; - - ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, pipeName, TEST_CHAR, TEST_SIZE}; + ml::test::CThreadDataWriter threadWriter{SLEEP_TIME_MS, TEST_PIPE_NAME, + TEST_CHAR, TEST_SIZE}; BOOST_TEST_REQUIRE(threadWriter.start()); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TFileP file{ - ml::core::CNamedPipeFactory::openPipeFileRead(pipeName, dummy)}; + ml::core::CNamedPipeFactory::openPipeFileRead(TEST_PIPE_NAME, dummy)}; BOOST_TEST_REQUIRE(file); static const std::size_t BUF_SIZE{512}; @@ -130,14 +129,12 @@ BOOST_AUTO_TEST_CASE(testServerIsCReader) { } BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { - const std::string pipeName = TEST_PIPE_NAME + "_testServerIsCppWriter"; - - ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, pipeName}; + ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; BOOST_TEST_REQUIRE(threadReader.start()); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TOStreamP strm{ - ml::core::CNamedPipeFactory::openPipeStreamWrite(pipeName, dummy)}; + ml::core::CNamedPipeFactory::openPipeStreamWrite(TEST_PIPE_NAME, dummy)}; BOOST_TEST_REQUIRE(strm); std::size_t charsLeft{TEST_SIZE}; @@ -162,14 +159,12 @@ BOOST_AUTO_TEST_CASE(testServerIsCppWriter) { } BOOST_AUTO_TEST_CASE(testServerIsCWriter) { - const std::string pipeName = TEST_PIPE_NAME + "_testServerIsCWriter"; - - ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, pipeName}; + ml::test::CThreadDataReader threadReader{PAUSE_TIME_MS, MAX_ATTEMPTS, TEST_PIPE_NAME}; BOOST_TEST_REQUIRE(threadReader.start()); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TFileP file{ - ml::core::CNamedPipeFactory::openPipeFileWrite(pipeName, dummy)}; + ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; BOOST_TEST_REQUIRE(file); sleep(1); @@ -199,14 +194,14 @@ BOOST_AUTO_TEST_CASE(testCancelBlock) { BOOST_TEST_REQUIRE(cancellerThread.start()); ml::core::CNamedPipeFactory::TOStreamP strm{ml::core::CNamedPipeFactory::openPipeStreamWrite( - TEST_PIPE_NAME + "_testCancelBlock", cancellerThread.hasCancelledBlockingCall())}; + TEST_PIPE_NAME, cancellerThread.hasCancelledBlockingCall())}; BOOST_TEST_REQUIRE(strm == nullptr); BOOST_TEST_REQUIRE(cancellerThread.stop()); } BOOST_AUTO_TEST_CASE(testErrorIfRegularFile) { - const std::atomic_bool dummy{false}; + std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TIStreamP strm{ ml::core::CNamedPipeFactory::openPipeStreamRead("Main.cc", dummy)}; BOOST_TEST_REQUIRE(strm == nullptr); @@ -221,24 +216,23 @@ BOOST_AUTO_TEST_CASE(testErrorIfSymlink) { // Suppress the error about no assertions in this case BOOST_REQUIRE(BOOST_IS_DEFINED(Windows)); #else - const std::string TEST_SYMLINK_NAME{"test_symlink_testErrorIfSymlink"}; - const std::string testPipeName{TEST_PIPE_NAME + "_test_symlink_testErrorIfSymlink"}; + static const char* const TEST_SYMLINK_NAME{"test_symlink"}; // Remove any files left behind by a previous failed test, but don't check // the return codes as these calls will usually fail - ::unlink(TEST_SYMLINK_NAME.c_str()); - ::unlink(testPipeName.c_str()); + ::unlink(TEST_SYMLINK_NAME); + ::unlink(TEST_PIPE_NAME); - BOOST_REQUIRE_EQUAL(0, ::mkfifo(testPipeName.c_str(), S_IRUSR | S_IWUSR)); - BOOST_REQUIRE_EQUAL(0, ::symlink(testPipeName.c_str(), TEST_SYMLINK_NAME.c_str())); + BOOST_REQUIRE_EQUAL(0, ::mkfifo(TEST_PIPE_NAME, S_IRUSR | S_IWUSR)); + BOOST_REQUIRE_EQUAL(0, ::symlink(TEST_PIPE_NAME, TEST_SYMLINK_NAME)); std::atomic_bool dummy{false}; ml::core::CNamedPipeFactory::TIStreamP strm{ ml::core::CNamedPipeFactory::openPipeStreamRead(TEST_SYMLINK_NAME, dummy)}; BOOST_TEST_REQUIRE(strm == nullptr); - BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_SYMLINK_NAME.c_str())); - BOOST_REQUIRE_EQUAL(0, ::unlink(testPipeName.c_str())); + BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_SYMLINK_NAME)); + BOOST_REQUIRE_EQUAL(0, ::unlink(TEST_PIPE_NAME)); #endif } diff --git a/run_tests_as_seperate_processes.sh b/run_tests_as_seperate_processes.sh deleted file mode 100755 index 4208be8fd4..0000000000 --- a/run_tests_as_seperate_processes.sh +++ /dev/null @@ -1,179 +0,0 @@ -#!/bin/bash -# -# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one -# or more contributor license agreements. Licensed under the Elastic License -# 2.0 and the following additional limitation. Functionality enabled by the -# files subject to the Elastic License 2.0 may only be used in production when -# invoked by an Elasticsearch process with a license key installed that permits -# use of machine learning features. You may not use this file except in -# compliance with the Elastic License 2.0 and the foregoing additional -# limitation. -# - -# This script ultimately gets called from within the docker entry point script. -# It provides a wrapper around the call to "cmake" that runs the test cases -# and provides some flexibility as to how the tests should be run in terms of how they -# are spread across processes. This is necessary when trying to isolate the impact memory -# usage of tests have upon one another. -# -# It is intended to be called as part of the CI build/test process but should be able to be run manually. -# -# It should be called with 3 parameters -# cmake_build_dir: The directory that cmake is using for build outputs, i.e. that passed to cmake's --build argument -# cmake_current_binary_dir: The directory containing the current test suite executable e.g. /test/lib/api/unittest -# test_suite: The name of the test suite to run, minus any leading "ml_", e.g. "test_api" -# -# In addition to the required parameters there are several environment variables that control the script's behaviour -# BOOST_TEST_MAX_ARGS: The maximum number of test cases to be passed off to a sub shell -# BOOST_TEST_MAX_PROCS: The maximum number of sub shells to use -# BOOST_TEST_MIXED_MODE: If set to "true" then rather than iterating over each individual test passed to a sub-shell -# run them all in the same BOOST test executable process. -# -# Design decisions: The script relies upon the simplest tools available on most unix like platforms - bash, sed and -# awk (the awk script does not use any GNU extensions for maximum portability). This is to keep the number of dependencies -# required by CI build images to a minimum (so e.g. no python etc.) - -if [ $# -lt 3 ]; then - echo "Usage: $0 " - echo "e.g.: $0 ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local ${CPP_SRC_HOME}/cmake-build-relwithdebinfo-local/test/lib/api/unittest test_api" - exit -fi - -export BUILD_DIR=$( echo $1 | sed 's|/$||' ) -export BINARY_DIR=$( echo $2 | sed 's|/$||' ) -export TEST_SUITE=$3 - -TEST_DIR=${CPP_SRC_HOME}/$(echo $BINARY_DIR | sed -e "s|$BUILD_DIR/test/||" -e 's|unittest.*|unittest|') - -export TEST_EXECUTABLE="$2/ml_$3" -export LOG_DIR="$2/test_logs" - -function num_procs() { - if [ `uname` = "Darwin" ]; then - sysctl -n hw.logicalcpu - else - nproc - fi -} - -MAX_ARGS=1 -MAX_PROCS=$(num_procs) - -if [[ -n "$BOOST_TEST_MAX_ARGS" ]]; then - MAX_ARGS=$BOOST_TEST_MAX_ARGS -fi - -if [[ -n "$BOOST_TEST_MAX_PROCS" ]]; then - MAX_PROCS=$BOOST_TEST_MAX_PROCS -fi - -rm -rf "$LOG_DIR" -mkdir -p "$LOG_DIR" - -function get_qualified_test_names() { - executable_path=$1 - - output_lines=$($executable_path --list_content 2>&1) - - while IFS= read -r line; do - match=$(grep -w '^[ ]*C.*Test' <<< "$line"); - if [ $? -eq 0 ]; then - suite=$match - continue - fi - match=$(grep -w 'test.*\*$' <<< "$line"); - if [ $? -eq 0 ]; then - case=$(sed 's/[ \*]//g' <<< "$suite/$match") - echo "$case" - fi - done <<< "$output_lines" -} - -# get the fully qualified test names -echo "Discovering tests..." -ALL_TEST_NAMES=$(get_qualified_test_names "$TEST_EXECUTABLE") - -if [ -z "$ALL_TEST_NAMES" ]; then - echo "No tests found to run or error in test discovery." - exit 1 -fi - -function execute_tests() { - - if [[ "$BOOST_TEST_MIXED_MODE" == "true" ]]; then - TEST_CASES=$(sed 's/ /:/g' <<< $@) - else - TEST_CASES=$@ - fi - - # Loop through each test - for TEST_NAME in $TEST_CASES; do - echo "--------------------------------------------------" - echo "Running test: $TEST_NAME" - - # Replace slashes and potentially other special chars for a safe filename - SAFE_TEST_LOG_FILENAME=$(echo "$TEST_NAME" | sed 's/[^a-zA-Z0-9_]/_/g' | cut -c-100) - LOG_FILE="$LOG_DIR/${SAFE_TEST_LOG_FILENAME}.log" - - # Execute the test in a separate process - TESTS=$TEST_NAME cmake --build $BUILD_DIR -t $TEST_SUITE > "$LOG_FILE" 2>&1 - TEST_STATUS=$? - - if [ $TEST_STATUS -eq 0 ]; then - echo "Test '$TEST_NAME' PASSED." - else - echo "Test '$TEST_NAME' FAILED with exit code $TEST_STATUS. Check '$LOG_FILE' for details." - fi - done -} - -export -f execute_tests - -RESULT=$(echo $ALL_TEST_NAMES | xargs -n $MAX_ARGS -P $MAX_PROCS bash -c 'execute_tests "$@"' _) - -echo "--------------------------------------------------" - -grep 'FAILED with exit code' <<< $RESULT -if [ $? -eq 0 ] -then - echo "$TEST_SUITE: Some individual tests FAILED. Check logs in '$LOG_DIR'." - echo found -else - echo "$TEST_SUITE: All individual tests PASSED." -fi - -function merge_junit_results() { - JUNIT_FILES="$@" - echo "" - cat $JUNIT_FILES | \ - awk ' - BEGIN{tests=0; skipped=0; errors=0; failures=0; id=""; time=0.0; name=""} - $0 ~ /"}' - - cat $JUNIT_FILES | sed -e '/xml/d' -e '/testsuite/d' -e '//{H;d;};x;/skipped/d' | grep '.' -echo "" -echo -} - -if [[ $BOOST_TEST_OUTPUT_FORMAT_FLAGS =~ junit ]]; then - merge_junit_results $TEST_DIR/boost_test_results_C*.junit > $TEST_DIR/boost_test_results.junit -fi - diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index b96518a8a9..1877e64b5e 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -31,17 +31,8 @@ add_custom_target(run_tests DEPENDS clean_test_results ${ML_TEST_DEPENDS} WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} ) -add_custom_target(run_tests_individually - DEPENDS clean_test_results ${ML_TEST_INDIVIDUALLY_DEPENDS} - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} -) add_custom_target(test DEPENDS run_tests COMMAND ${CMAKE_COMMAND} -DTEST_DIR=${CMAKE_BINARY_DIR} -P ${CMAKE_SOURCE_DIR}/cmake/test-check-success.cmake WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} ) -add_custom_target(test_individually - DEPENDS run_tests_individually - COMMAND ${CMAKE_COMMAND} -DTEST_DIR=${CMAKE_BINARY_DIR} -P ${CMAKE_SOURCE_DIR}/cmake/test-check-success.cmake - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} -) \ No newline at end of file From a536d4619e0de7a940523b8cf7c2bce39fde55ac Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Mon, 28 Jul 2025 11:27:24 +1200 Subject: [PATCH 48/49] Remove unneded sleep in test case --- lib/core/unittest/CNamedPipeFactoryTest.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/lib/core/unittest/CNamedPipeFactoryTest.cc b/lib/core/unittest/CNamedPipeFactoryTest.cc index 6ad24c5829..39aef5e07e 100644 --- a/lib/core/unittest/CNamedPipeFactoryTest.cc +++ b/lib/core/unittest/CNamedPipeFactoryTest.cc @@ -167,7 +167,6 @@ BOOST_AUTO_TEST_CASE(testServerIsCWriter) { ml::core::CNamedPipeFactory::openPipeFileWrite(TEST_PIPE_NAME, dummy)}; BOOST_TEST_REQUIRE(file); - sleep(1); std::size_t charsLeft{TEST_SIZE}; std::size_t blockSize{7}; while (charsLeft > 0) { From 0a6e2b11f771ee06a28d30c25f6184b14403a2c5 Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 31 Jul 2025 12:22:41 +1200 Subject: [PATCH 49/49] Update changelog --- docs/CHANGELOG.asciidoc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/CHANGELOG.asciidoc b/docs/CHANGELOG.asciidoc index aeb1f7748c..3d8a09fc68 100644 --- a/docs/CHANGELOG.asciidoc +++ b/docs/CHANGELOG.asciidoc @@ -28,12 +28,17 @@ //=== Regressions +== {es} version 9.2.0 + +=== Enhancements + +* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846]) + == {es} version 9.1.0 === Enhancements * Track memory used in the hierarchical results normalizer. (See {ml-pull}2831[#2831].) -* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846]) === Bug Fixes