Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
a7b1ce2
[ML] Report the "actual" memory usage of the autodetect process
edsavage Apr 4, 2025
cb957ca
Update changelog
edsavage Apr 4, 2025
8f73f02
Formatting
edsavage Apr 4, 2025
d3a39ae
Appease SonarQube
edsavage Apr 7, 2025
e2d1bf5
Tweak unit test for platform portability
edsavage Apr 7, 2025
1a9a99a
Attend to review comments
edsavage Apr 9, 2025
5ae22cb
Update bin/autodetect/Main.cc
edsavage Apr 10, 2025
fe6f1fa
Update include/model/CResourceMonitor.h
edsavage Apr 10, 2025
582430e
Update include/model/CResourceMonitor.h
edsavage Apr 10, 2025
9476ede
Attend to review comments
edsavage Apr 11, 2025
e29d8f7
Merge remote-tracking branch 'origin/ad_real_mem_usage' into ad_real_…
edsavage Apr 11, 2025
475fef1
Formatting
edsavage Apr 11, 2025
fc8888c
Merge branch 'main' of github.com:elastic/ml-cpp into ad_real_mem_usage
edsavage May 23, 2025
6945d3f
Attend to failing unit tests (hopefully)
edsavage May 27, 2025
3b69b72
Formatting.. grr
edsavage May 27, 2025
1bafa8c
On Linux only, use the value of the system memory usage (max resident…
edsavage Jun 4, 2025
efc311b
Fix copyright headers
edsavage Jun 4, 2025
6be6395
Nits in test code
edsavage Jun 4, 2025
134a494
Attend to code review comments
edsavage Jun 9, 2025
ed426ac
Formatting
edsavage Jun 9, 2025
4c8bf8b
Attend to code review comments
edsavage Jun 11, 2025
fa9c4fa
Small tidy up of CProcessMemoryUsage.cc
edsavage Jun 12, 2025
014be1c
Small tidy up
edsavage Jun 12, 2025
41c02de
On Linux, return system memory (max resident set size) from CResource…
edsavage Jun 17, 2025
50c535f
[ML] Add a script to run each unit test separately
edsavage Jun 27, 2025
1c5c46e
Slight tweak to reduce load on linux builds
edsavage Jun 29, 2025
0109cce
Replace use of python script with bash function
edsavage Jun 30, 2025
01c1e6b
Pass all JUNIT result files to test collector
edsavage Jun 30, 2025
e2d2d17
Better reporting of unit test results
edsavage Jul 2, 2025
7904e76
Slight tweak to parameters to test script
edsavage Jul 2, 2025
185a0e0
Tweaks and typos
edsavage Jul 2, 2025
0a6a086
Portability fixes
edsavage Jul 3, 2025
6c97572
Fix failing test cases
edsavage Jul 3, 2025
40f522a
Merge branch 'main' of github.com:elastic/ml-cpp into run_tests_indiv…
edsavage Jul 3, 2025
733b038
Formatting
edsavage Jul 3, 2025
62dce12
Tidy up of scripts
edsavage Jul 4, 2025
55875c0
Further tidy up of scripts
edsavage Jul 4, 2025
9c67353
Tweaks for linux aarch64
edsavage Jul 4, 2025
d265102
Tweak for linux aarch64 seccomp test
edsavage Jul 6, 2025
2cf5752
Typo
edsavage Jul 6, 2025
3ace6f4
Better isolation of tests
edsavage Jul 8, 2025
4014a72
Formatting
edsavage Jul 8, 2025
e3d2f8a
Formatting
edsavage Jul 8, 2025
7b39899
Reworking test case
edsavage Jul 8, 2025
58fafb4
Small tidy up
edsavage Jul 9, 2025
033adef
Minor tweaks to test runner script
edsavage Jul 11, 2025
fe71d2a
typo
edsavage Jul 13, 2025
e8c00e7
Merge branch 'main' of github.com:elastic/ml-cpp into ad_real_mem_usage
edsavage Jul 14, 2025
13fa5cf
Merge branch 'run_tests_individually' of github.com:edsavage/ml-cpp i…
edsavage Jul 14, 2025
bbe25b1
Set both adjusted usage and adjusted peak usage to system memory.
edsavage Jul 25, 2025
3ab09ba
Formatting
edsavage Jul 25, 2025
39b4f3b
Revert "Merge branch 'run_tests_individually' of github.com:edsavage/…
edsavage Jul 27, 2025
a536d46
Remove unneded sleep in test case
edsavage Jul 27, 2025
0a6e2b1
Update changelog
edsavage Jul 31, 2025
befb881
Merge branch 'main' into ad_real_mem_usage
edsavage Jul 31, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion bin/autodetect/Main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <core/CJsonOutputStreamWrapper.h>
#include <core/CLogger.h>
#include <core/CProcessPriority.h>
#include <core/CProcessStats.h>
#include <core/CProgramCounters.h>
#include <core/CStringUtils.h>
#include <core/CoreTypes.h>
Expand Down Expand Up @@ -83,7 +84,9 @@ int main(int argc, char** argv) {
ml::counter_t::E_TSADNumberMemoryLimitModelCreationFailures,
ml::counter_t::E_TSADNumberPrunedItems,
ml::counter_t::E_TSADAssignmentMemoryBasis,
ml::counter_t::E_TSADOutputMemoryAllocatorUsage};
ml::counter_t::E_TSADOutputMemoryAllocatorUsage,
ml::counter_t::E_TSADSystemMemoryUsage,
ml::counter_t::E_TSADMaxSystemMemoryUsage};

ml::core::CProgramCounters::registerProgramCounterTypes(counters);

Expand Down Expand Up @@ -151,6 +154,8 @@ int main(int argc, char** argv) {
}
cancellerThread.stop();

LOG_DEBUG(<< "Max Resident Set Size: " << ml::core::CProcessStats::maxResidentSetSize());
LOG_DEBUG(<< "Resident Set Size: " << ml::core::CProcessStats::residentSetSize());
// Log the program version immediately after reconfiguring the logger. This
// must be done from the program, and NOT a shared library, as each program
// statically links its own version library.
Expand Down
6 changes: 6 additions & 0 deletions docs/CHANGELOG.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@

//=== Regressions

== {es} version 9.2.0

=== Enhancements

* Report the actual memory usage of the autodetect process. (See {ml-pull}2846[#2846])

== {es} version 9.1.0

=== Enhancements
Expand Down
12 changes: 11 additions & 1 deletion include/core/CProgramCounters.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,12 @@ enum ECounterTypes {
//! The memory currently used by the allocators to output JSON documents, in bytes.
E_TSADOutputMemoryAllocatorUsage = 30,

//! The resident set size of the process, in bytes.
E_TSADSystemMemoryUsage = 31,

//! The maximum resident set size of the process, in bytes.
E_TSADMaxSystemMemoryUsage = 32,

// Data Frame Outlier Detection

//! The estimated peak memory usage for outlier detection in bytes
Expand Down Expand Up @@ -146,7 +152,7 @@ enum ECounterTypes {
// Add any new values here

//! This MUST be last, increment the value for every new enum added
E_LastEnumCounter = 31
E_LastEnumCounter = 33
};

static constexpr std::size_t NUM_COUNTERS = static_cast<std::size_t>(E_LastEnumCounter);
Expand Down Expand Up @@ -355,6 +361,10 @@ class CORE_EXPORT CProgramCounters {
"Which option is being used to get model memory for node assignment?"},
{counter_t::E_TSADOutputMemoryAllocatorUsage, "E_TSADOutputMemoryAllocatorUsage",
"The amount of memory used to output JSON documents, in bytes."},
{counter_t::E_TSADSystemMemoryUsage, "E_TSADSystemMemoryUsage",
"The amount of system memory used by the process, in bytes"},
{counter_t::E_TSADMaxSystemMemoryUsage, "E_TSADMaxSystemMemoryUsage",
"The maximum amount of system memory used by the process, in bytes"},
{counter_t::E_DFOEstimatedPeakMemoryUsage, "E_DFOEstimatedPeakMemoryUsage",
"The upfront estimate of the peak memory outlier detection would use"},
{counter_t::E_DFOPeakMemoryUsage, "E_DFOPeakMemoryUsage", "The peak memory outlier detection used"},
Expand Down
40 changes: 40 additions & 0 deletions include/model/CProcessMemoryUsage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/

#ifndef INCLUDED_ml_model_CSystemMemoryUsage_h
#define INCLUDED_ml_model_CSystemMemoryUsage_h

#include <model/ImportExport.h>

#include <cstddef>

namespace ml {
namespace model {

//! \brief Determines how to calculate the memory used by the current process.
//!
//! DESCRIPTION:\n
//! Determines how to calculate the memory used by the current process based on the operating system.
//! On some OS's (Mac, Windows) we use the estimated memory usage of the models,
//! while on others (Linux) we use the actual memory of the process as provided by system calls.
class MODEL_EXPORT CProcessMemoryUsage {
public:
enum class EMemoryStrategy { E_Estimated, E_System };

static const EMemoryStrategy MEMORY_STRATEGY;

public:
CProcessMemoryUsage() = delete;
};
}
}

#endif //INCLUDED_ml_model_CSystemMemoryUsage_h
9 changes: 9 additions & 0 deletions include/model/CResourceMonitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,12 @@ class MODEL_EXPORT CResourceMonitor {
//! Returns the sum of used memory plus any extra memory
std::size_t totalMemory() const;

//! Returns the current physical memory of the process (rss) as reported by the system
static std::size_t systemMemory();

//! Returns the maximum physical memory of the process (max rss) as reported by the system
static std::size_t maxSystemMemory();

private:
using TMonitoredResourcePtrSizeUMap =
boost::unordered_map<CMonitoredResource*, std::size_t>;
Expand Down Expand Up @@ -229,6 +235,9 @@ class MODEL_EXPORT CResourceMonitor {
//! Returns the amount by which reported memory usage is scaled depending on the type of persistence in use
std::size_t persistenceMemoryIncreaseFactor() const;

//! Modify the supplied usage value depending on a platform dependent strategy.
std::size_t applyMemoryStrategy(std::size_t usage) const;

private:
//! The registered collection of components
TMonitoredResourcePtrSizeUMap m_Resources;
Expand Down
4 changes: 4 additions & 0 deletions lib/api/CAnomalyJob.cc
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,10 @@ bool CAnomalyJob::handleRecord(const TStrStrUMap& dataRowFields, TOptionalTime t
}

++core::CProgramCounters::counter(counter_t::E_TSADNumberApiRecordsHandled);
core::CProgramCounters::counter(counter_t::E_TSADSystemMemoryUsage) =
model::CResourceMonitor::systemMemory();
core::CProgramCounters::counter(counter_t::E_TSADMaxSystemMemoryUsage) =
model::CResourceMonitor::maxSystemMemory();

++m_NumRecordsHandled;
m_LatestRecordTime = std::max(m_LatestRecordTime, *time);
Expand Down
2 changes: 2 additions & 0 deletions lib/api/CModelSizeStatsJsonWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ const std::string JOB_ID{"job_id"};
const std::string MODEL_SIZE_STATS{"model_size_stats"};
const std::string MODEL_BYTES{"model_bytes"};
const std::string PEAK_MODEL_BYTES{"peak_model_bytes"};
const std::string SYSTEM_MEMORY_BYTES{"system_memory_bytes"};
const std::string MAX_SYSTEM_MEMORY_BYTES{"max_system_memory_bytes"};
Comment on lines +28 to +29
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICS, this keys are not used anymore.

const std::string MODEL_BYTES_EXCEEDED{"model_bytes_exceeded"};
const std::string MODEL_BYTES_MEMORY_LIMIT{"model_bytes_memory_limit"};
const std::string TOTAL_BY_FIELD_COUNT{"total_by_field_count"};
Expand Down
4 changes: 2 additions & 2 deletions lib/api/unittest/CAnomalyJobLimitTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* limitation.
*/
#include <core/CJsonOutputStreamWrapper.h>
#include <core/CProcessStats.h>
#include <core/CoreTypes.h>

#include <maths/common/CIntegerTools.h>
Expand Down Expand Up @@ -105,8 +106,6 @@ BOOST_AUTO_TEST_CASE(testAccuracy) {
core::CJsonOutputStreamWrapper wrappedOutputStream(outputStrm);

model::CLimits limits;
//limits.resourceMonitor().m_ByteLimitHigh = 100000;
//limits.resourceMonitor().m_ByteLimitLow = 90000;

{
LOG_TRACE(<< "Setting up job");
Expand All @@ -129,6 +128,7 @@ BOOST_AUTO_TEST_CASE(testAccuracy) {
nonLimitedUsage = limits.resourceMonitor().totalMemory();
}
}
LOG_DEBUG(<< "nonLimitedUsage: " << nonLimitedUsage);
{
// Now run the data with limiting
ml::api::CAnomalyJobConfig jobConfig = CTestAnomalyJob::makeSimpleJobConfig(
Expand Down
4 changes: 2 additions & 2 deletions lib/api/unittest/CJsonOutputWriterTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1733,7 +1733,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) {
resourceUsage.s_OverFields = 7;
resourceUsage.s_AllocationFailures = 8;
resourceUsage.s_MemoryStatus = ml::model_t::E_MemoryStatusHardLimit;
resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisCurrentModelBytes;
resourceUsage.s_AssignmentMemoryBasis = ml::model_t::E_AssignmentBasisPeakModelBytes;
resourceUsage.s_BucketStartTime = 9;
resourceUsage.s_BytesExceeded = 10;
resourceUsage.s_BytesMemoryLimit = 11;
Expand Down Expand Up @@ -1785,7 +1785,7 @@ BOOST_AUTO_TEST_CASE(testReportMemoryUsage) {
BOOST_TEST_REQUIRE(sizeStats.contains("memory_status"));
BOOST_REQUIRE_EQUAL("hard_limit", sizeStats.at("memory_status").as_string());
BOOST_TEST_REQUIRE(sizeStats.contains("assignment_memory_basis"));
BOOST_REQUIRE_EQUAL("current_model_bytes",
BOOST_REQUIRE_EQUAL("peak_model_bytes",
sizeStats.at("assignment_memory_basis").as_string());
BOOST_TEST_REQUIRE(sizeStats.contains("log_time"));
std::int64_t nowMs{ml::core::CTimeUtils::nowMs()};
Expand Down
3 changes: 2 additions & 1 deletion lib/core/CProcessStats_MacOSX.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <core/CLogger.h>
#include <core/CProcessStats.h>

#include <core/CLogger.h>

#include <errno.h>
#include <fcntl.h>
#include <sys/resource.h>
Expand Down
4 changes: 3 additions & 1 deletion lib/core/CProcessStats_Windows.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/
#include <core/CLogger.h>
#include <core/CProcessStats.h>

#include <core/CLogger.h>
#include <core/CWindowsError.h>

#include <core/WindowsSafe.h>
Expand All @@ -36,6 +37,7 @@ std::size_t CProcessStats::maxResidentSetSize() {
LOG_DEBUG(<< "Failed to retrieve memory info " << CWindowsError());
return 0;
}

return static_cast<std::size_t>(stats.PeakWorkingSetSize);
}
}
Expand Down
1 change: 1 addition & 0 deletions lib/model/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ ml_add_library(MlModel SHARED
CSampleCounts.cc
CSearchKey.cc
CSimpleCountDetector.cc
CProcessMemoryUsage.cc
CTokenListCategory.cc
CTokenListDataCategorizerBase.cc
CTokenListReverseSearchCreator.cc
Expand Down
20 changes: 20 additions & 0 deletions lib/model/CProcessMemoryUsage.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/

#include <model/CProcessMemoryUsage.h>

namespace ml {
namespace model {
// On platforms other than Linux the process memory usage is the estimated size of the models.
const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{
EMemoryStrategy::E_Estimated};
}
}
21 changes: 21 additions & 0 deletions lib/model/CProcessMemoryUsage_Linux.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the following additional limitation. Functionality enabled by the
* files subject to the Elastic License 2.0 may only be used in production when
* invoked by an Elasticsearch process with a license key installed that permits
* use of machine learning features. You may not use this file except in
* compliance with the Elastic License 2.0 and the foregoing additional
* limitation.
*/

#include <model/CProcessMemoryUsage.h>

namespace ml {
namespace model {

// On Linux the process memory usage is determined by the OS.
const CProcessMemoryUsage::EMemoryStrategy CProcessMemoryUsage::MEMORY_STRATEGY{
EMemoryStrategy::E_System};
}
}
33 changes: 30 additions & 3 deletions lib/model/CResourceMonitor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@

#include <core/CLogger.h>
#include <core/CMemoryDef.h>
#include <core/CProcessStats.h>
#include <core/CProgramCounters.h>
#include <core/Constants.h>

#include <maths/common/CMathsFuncs.h>
#include <maths/common/CTools.h>

#include <model/CMonitoredResource.h>
#include <model/CProcessMemoryUsage.h>

#include <algorithm>
#include <cmath>
Expand Down Expand Up @@ -378,11 +380,13 @@ CResourceMonitor::SModelSizeStats
CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) {
SModelSizeStats res;
res.s_Usage = this->totalMemory();
res.s_AdjustedUsage = this->adjustedUsage(res.s_Usage);
res.s_PeakUsage = static_cast<std::size_t>(
core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage));
res.s_AdjustedPeakUsage = this->adjustedUsage(res.s_PeakUsage);
res.s_BytesMemoryLimit = this->persistenceMemoryIncreaseFactor() * m_ByteLimitHigh;
// On Linux both adjusted usage and adjusted peak usage are set to system memory usage (max resident set size)
// These are the values reported back to the Java process, they are not used for any other purpose.
res.s_AdjustedUsage = this->applyMemoryStrategy(this->adjustedUsage(res.s_Usage));
res.s_AdjustedPeakUsage = this->applyMemoryStrategy(this->adjustedUsage(res.s_PeakUsage));
res.s_BytesMemoryLimit = this->getBytesMemoryLimit();
res.s_BytesExceeded = m_CurrentBytesExceeded;
res.s_MemoryStatus = m_MemoryStatus;
std::uint64_t assignmentMemoryBasis{
Expand All @@ -400,6 +404,22 @@ CResourceMonitor::createMemoryUsageReport(core_t::TTime bucketStartTime) {
return res;
}

std::size_t CResourceMonitor::applyMemoryStrategy(std::size_t usage) const {
std::size_t modifiedUsage{0};
switch (CProcessMemoryUsage::MEMORY_STRATEGY) {
case CProcessMemoryUsage::EMemoryStrategy::E_Estimated: {
modifiedUsage = usage;
break;
}
case CProcessMemoryUsage::EMemoryStrategy::E_System: {
modifiedUsage = core::CProcessStats::maxResidentSetSize();
break;
}
default: { LOG_WARN(<< "Unknown memory strategy"); }
}
return modifiedUsage;
}

std::size_t CResourceMonitor::adjustedUsage(std::size_t usage) const {
// We scale the reported memory usage by the inverse of the byte limit margin.
// This gives the user a fairer indication of how close the job is to hitting
Expand Down Expand Up @@ -491,5 +511,12 @@ std::size_t CResourceMonitor::totalMemory() const {
counter_t::E_TSADOutputMemoryAllocatorUsage));
}

std::size_t CResourceMonitor::systemMemory() {
return core::CProcessStats::residentSetSize();
}

std::size_t CResourceMonitor::maxSystemMemory() {
return core::CProcessStats::maxResidentSetSize();
}
} // model
} // ml
2 changes: 1 addition & 1 deletion lib/model/unittest/CResourceMonitorTest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ BOOST_FIXTURE_TEST_CASE(testExtraMemory, CTestFixture) {
}

BOOST_FIXTURE_TEST_CASE(testPeakUsage, CTestFixture) {
// Clear the counter so that other test cases do not interfere.
// Clear the counters so that other test cases do not interfere.
core::CProgramCounters::counter(counter_t::E_TSADPeakMemoryUsage) = 0;

CLimits limits;
Expand Down