Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions agents/grpc/proto/reconfigure.proto
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ message ReconfigureBody {
optional bool tracingEnabled = 10;
optional uint32 tracingModulesBlacklist = 11;
optional bool contCpuProfile = 12;
optional bool assetsEnabled = 13;
}

message ReconfigureEvent {
Expand Down
26 changes: 20 additions & 6 deletions agents/grpc/src/grpc_agent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,10 @@ void PopulateReconfigureEvent(grpcagent::ReconfigureEvent* reconfigure_event,
if (it != config.end()) {
body->set_contcpuprofile(*it);
}
it = config.find("assetsEnabled");
if (it != config.end()) {
body->set_assetsenabled(*it);
}
}

void PopulateStartupTimesEvent(grpcagent::StartupTimesEvent* st_events,
Expand Down Expand Up @@ -440,6 +444,7 @@ GrpcAgent::GrpcAgent(): hooks_init_(false),
agent_id_(GetAgentId()),
auth_retries_(0),
unauthorized_(false),
assets_enabled_(true),
profile_on_exit_(false) {
ASSERT_EQ(0, uv_loop_init(&loop_));
ASSERT_EQ(0, uv_cond_init(&start_cond_));
Expand Down Expand Up @@ -1186,12 +1191,12 @@ int GrpcAgent::config(const json& config) {
ret = setup_metrics_timer(period);
}

// uint64_t period = NSOLID_SPANS_FLUSH_INTERVAL;
// auto spans_flush_interval =
// per_process::system_environment->Get(kNSOLID_SPANS_FLUSH_INTERVAL);
// if (spans_flush_interval.has_value()) {
// period = std::stoull(spans_flush_interval.value());
// }
{
auto it = config_.find("assetsEnabled");
if (it != config_.end()) {
assets_enabled_ = *it;
}
}

return ret;
}
Expand Down Expand Up @@ -1747,6 +1752,10 @@ void GrpcAgent::reconfigure(const grpcagent::CommandRequest& request) {
out["contCpuProfile"] = body.contcpuprofile();
}

if (body.has_assetsenabled()) {
out["assetsEnabled"] = body.assetsenabled();
}

DebugJSON("Reconfigure out: \n%s\n", out);

UpdateConfig(out.dump());
Expand Down Expand Up @@ -2084,6 +2093,11 @@ ErrorType GrpcAgent::do_start_prof_init(
const grpcagent::CommandRequest& req,
const ProfileType& type,
ProfileOptions& options) {

if (!assets_enabled_) {
return ErrorType::EAssetsDisabled;
}

Comment on lines +2096 to +2100
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

*Critical: data race on assets_enabled_ (and on config_ for contCpuProfile) when called from _from_js.

start_*_from_js invokes do_start_prof_init on potentially non‑agent threads. Reading a non‑atomic bool and nlohmann::json without synchronization is undefined behavior.

Fix options (pick one):

  • Make flags atomic and stop reading config_ off-thread.
  • Or route the entire do_start_prof_init to the agent loop (enqueue request), returning immediately to JS.

Minimal atomic fix (recommended now), plus JSON type safety:

-  if (!assets_enabled_) {
+  if (!assets_enabled_.load(std::memory_order_acquire)) {
     return ErrorType::EAssetsDisabled;
   }
-  if (type == ProfileType::kCpu) {
-    auto it = config_.find("contCpuProfile");
-    if (it != config_.end() && it->get<bool>() == true) {
-      return ErrorType::EInProgressError;
-    }
-  }
+  if (type == ProfileType::kCpu &&
+      cont_cpu_profile_enabled_.load(std::memory_order_acquire)) {
+    return ErrorType::EInProgressError;
+  }

Header changes (agents/grpc/src/grpc_agent.h):

+#include <atomic>
 ...
-  bool assets_enabled_;
+  std::atomic<bool> assets_enabled_{true};
+  std::atomic<bool> cont_cpu_profile_enabled_{false};

Update config() to snapshot contCpuProfile to atomic:

+  {
+    auto it = config_.find("contCpuProfile");
+    if (it != config_.end()) {
+      if (it->is_boolean()) {
+        cont_cpu_profile_enabled_.store(*it, std::memory_order_relaxed);
+      } else {
+        Debug("Invalid contCpuProfile type; expected boolean\n");
+        cont_cpu_profile_enabled_.store(false, std::memory_order_relaxed);
+      }
+    } else {
+      cont_cpu_profile_enabled_.store(false, std::memory_order_relaxed);
+    }
+  }

Optional: if assetsEnabled should also suppress continuous profiles, add a fast check at the start of got_continuous_profile to drop or close streams when disabled.


🏁 Script executed:

#!/bin/bash
# Locate *_from_js callsites and assess threading context.
rg -nP 'start_(cpu|heap)_(profile|sampling|snapshot)_from_js\s*\(' -C3 --type=cpp
# Grep for direct calls to do_start_prof_init outside agent loop callbacks.
rg -nP '\bdo_start_prof_init\s*\(' -C3 --type=cpp

Length of output: 17670


I need to inspect the actual implementation of do_start_prof_init and check the current state of the code:

Now let me inspect the actual implementation in grpc_agent.cc to see the current state:

Let me now examine the actual implementation in the grpc_agent.cc file around the line numbers indicated:

[/run_scripts]


🏁 Script executed:

#!/bin/bash
# Inspect the actual do_start_prof_init implementation in grpc_agent.cc
sed -n '2092,2130p' agents/grpc/src/grpc_agent.cc

Length of output: 1197


Now let me check the header file to see the current declarations:

[/run_scripts]


🏁 Script executed:

#!/bin/bash
# Check the declaration of assets_enabled_ and related fields in grpc_agent.h
grep -n "assets_enabled_\|cont_cpu_profile\|config_" agents/grpc/src/grpc_agent.h | head -30

Length of output: 279


Let me check the config() method and how config_ is synchronized:

[/run_scripts]


🏁 Script executed:

#!/bin/bash
# Find the config() method and config_ update patterns
rg -n "config_\s*=" agents/grpc/src/grpc_agent.cc -B2 -A2 | head -60
# Also search for config() method
rg -n "void.*config\s*\(" agents/grpc/src/grpc_agent.cc -A5 | head -40

Length of output: 182


Let me find what method contains the config assignment and check for synchronization:

[/run_scripts]


🏁 Script executed:

#!/bin/bash
# Find the method containing config_ assignment at line 1019
sed -n '990,1030p' agents/grpc/src/grpc_agent.cc

# Check for any mutexes in the header
grep -n "mutex\|lock\|Lock" agents/grpc/src/grpc_agent.h

Length of output: 1735


Let me verify one more detail about whether config() can be called concurrently with do_start_prof_init by checking the call sites:

[/run_scripts]


🏁 Script executed:

#!/bin/bash
# Check where config() method is called from
rg -n "\.config\(" agents/grpc/src/grpc_agent.cc -B1 | head -20

# Check if config_msg_cb_ processes config updates asynchronously or on agent loop
sed -n '1250,1350p' agents/grpc/src/grpc_agent.cc | grep -A 30 "config_msg_cb_"

Length of output: 0


Let me try a broader search for how config_msg_cb_ is used:

[/run_scripts]


🏁 Script executed:

#!/bin/bash
# Search for config_msg_cb_ definition and usage
rg -n "config_msg_cb_" agents/grpc/src/grpc_agent.cc -A 15 | head -50

Length of output: 1111


Critical data race confirmed: unprotected config_ access from concurrent threads.

The code modification at line 2097 (if (!assets_enabled_)) exposes a classic data race:

  • config_msg_cb_ (agent event loop thread) writes config_ unsafely at line 1019 via config() method
  • do_start_prof_init (arbitrary thread via *_from_js V8 callbacks) reads config_ unsafely at lines 2110–2115
  • No synchronization between these threads

Additionally, assets_enabled_ is declared std::atomic<bool> but uses implicit operator! conversion instead of explicit .load(std::memory_order_acquire), bypassing memory ordering guarantees.

Fix as suggested in the review comment:

  1. Replace if (!assets_enabled_) with if (!assets_enabled_.load(std::memory_order_acquire))
  2. Add std::atomic<bool> cont_cpu_profile_enabled_{false} to header
  3. Snapshot contCpuProfile from config_ into the atomic in the config() method (with proper null/type checks)
  4. Use cont_cpu_profile_enabled_.load(std::memory_order_acquire) in the check at line 2110–2115

These changes eliminate the data race by preventing unprotected JSON reads from arbitrary threads.

🤖 Prompt for AI Agents
In agents/grpc/src/grpc_agent.cc around lines 2096–2100 (and the read at
2110–2115), the code performs unsynchronized reads of config_ and uses implicit
atomic conversion; replace the implicit check if (!assets_enabled_) with if
(!assets_enabled_.load(std::memory_order_acquire)), add a new std::atomic<bool>
cont_cpu_profile_enabled_{false} to the header, update the config() writer
(called at line ~1019) to safely snapshot contCpuProfile from config_ into
cont_cpu_profile_enabled_ after doing null/type checks, and change the
arbitrary-thread check at lines 2110–2115 to use
cont_cpu_profile_enabled_.load(std::memory_order_acquire) so there are no
unprotected JSON reads from non-event-loop threads.

const grpcagent::ProfileArgs& args = req.args().profile();
uint64_t thread_id = args.thread_id();
uint64_t duration = args.duration();
Expand Down
1 change: 1 addition & 0 deletions agents/grpc/src/grpc_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,7 @@ class GrpcAgent: public std::enable_shared_from_this<GrpcAgent>,
log_exporter_;

// Profiling
std::atomic<bool> assets_enabled_;
nsuv::ns_mutex profile_state_lock_;
ProfileState profile_state_[ProfileType::kNumberOfProfileTypes];
std::atomic<bool> profile_on_exit_;
Expand Down
3 changes: 2 additions & 1 deletion agents/grpc/src/grpc_errors.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
X(ESnapshotDisabled, 500, "Heap Snapshots disabled", 1004) \
X(ENoMemory, 500, "Internal Runtime Error", 1005) \
X(ENotAvailable, 404, "Resource not available", 1006) \
X(ESourceCodeFileError, 500, "Internal Runtime Error", 1007)
X(ESourceCodeFileError, 500, "Internal Runtime Error", 1007) \
X(EAssetsDisabled, 500, "Assets collection disabled", 1008)

namespace node {
namespace nsolid {
Expand Down
78 changes: 48 additions & 30 deletions agents/grpc/src/proto/reconfigure.pb.cc

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

43 changes: 42 additions & 1 deletion agents/grpc/src/proto/reconfigure.pb.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading