Skip to content

Commit 7c92028

Browse files
authored
signal: Add hooks for calling fatal error handlers from non-envoy signal handlers (envoyproxy#12062)
Add hooks for calling fatal error handlers from non-Envoy signal handlers. Move register/removeFatalErrorHandler from SignalAction into a new FatalErrorHandler namespace, and add a new function, callFatalErrorHandlers, which runs the registered error handlers. This makes the crash logging from issue envoyproxy#7300 available for builds that don't use ENVOY_HANDLE_SIGNALS, as long as they do use ENVOY_OBJECT_TRACE_ON_DUMP. Risk Level: Low Testing: bazel test //test/... Docs Changes: N/A Release Notes: Added Fixes envoyproxy#11984 Signed-off-by: Michael Behr <[email protected]>
1 parent 8e34f9e commit 7c92028

File tree

11 files changed

+175
-82
lines changed

11 files changed

+175
-82
lines changed

docs/root/version_history/current.rst

+1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ New Features
6363
* router: added new
6464
:ref:`envoy-ratelimited<config_http_filters_router_retry_policy-envoy-ratelimited>`
6565
retry policy, which allows retrying envoy's own rate limited responses.
66+
* signal: added support for calling fatal error handlers without envoy's signal handler, via FatalErrorHandler::callFatalErrorHandlers().
6667
* stats: added optional histograms to :ref:`cluster stats <config_cluster_manager_cluster_stats_request_response_sizes>`
6768
that track headers and body sizes of requests and responses.
6869
* stats: allow configuring histogram buckets for stats sinks and admin endpoints that support it.

source/common/event/dispatcher_impl.cc

+2-8
Original file line numberDiff line numberDiff line change
@@ -45,19 +45,13 @@ DispatcherImpl::DispatcherImpl(const std::string& name, Buffer::WatermarkFactory
4545
post_cb_(base_scheduler_.createSchedulableCallback([this]() -> void { runPostCallbacks(); })),
4646
current_to_delete_(&to_delete_1_) {
4747
ASSERT(!name_.empty());
48-
#ifdef ENVOY_HANDLE_SIGNALS
49-
SignalAction::registerFatalErrorHandler(*this);
50-
#endif
48+
FatalErrorHandler::registerFatalErrorHandler(*this);
5149
updateApproximateMonotonicTimeInternal();
5250
base_scheduler_.registerOnPrepareCallback(
5351
std::bind(&DispatcherImpl::updateApproximateMonotonicTime, this));
5452
}
5553

56-
DispatcherImpl::~DispatcherImpl() {
57-
#ifdef ENVOY_HANDLE_SIGNALS
58-
SignalAction::removeFatalErrorHandler(*this);
59-
#endif
60-
}
54+
DispatcherImpl::~DispatcherImpl() { FatalErrorHandler::removeFatalErrorHandler(*this); }
6155

6256
void DispatcherImpl::initializeStats(Stats::Scope& scope,
6357
const absl::optional<std::string>& prefix) {

source/common/event/dispatcher_impl.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,12 @@ class DispatcherImpl : Logger::Loggable<Logger::Id::main>,
8080
void updateApproximateMonotonicTime() override;
8181

8282
// FatalErrorInterface
83-
void onFatalError() const override {
83+
void onFatalError(std::ostream& os) const override {
8484
// Dump the state of the tracked object if it is in the current thread. This generally results
8585
// in dumping the active state only for the thread which caused the fatal error.
8686
if (isThreadSafe()) {
8787
if (current_object_) {
88-
current_object_->dumpState(std::cerr);
88+
current_object_->dumpState(os);
8989
}
9090
}
9191
}

source/common/signal/BUILD

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ envoy_package()
1010

1111
envoy_cc_library(
1212
name = "fatal_error_handler_lib",
13+
srcs = ["fatal_error_handler.cc"],
1314
hdrs = ["fatal_error_handler.h"],
1415
)
1516

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#include "common/signal/fatal_error_handler.h"
2+
3+
#include <list>
4+
5+
#include "absl/base/attributes.h"
6+
#include "absl/synchronization/mutex.h"
7+
8+
namespace Envoy {
9+
namespace FatalErrorHandler {
10+
11+
namespace {
12+
13+
ABSL_CONST_INIT static absl::Mutex failure_mutex(absl::kConstInit);
14+
// Since we can't grab the failure mutex on fatal error (snagging locks under
15+
// fatal crash causing potential deadlocks) access the handler list as an atomic
16+
// operation, which is async-signal-safe. If the crash handler runs at the same
17+
// time as another thread tries to modify the list, one of them will get the
18+
// list and the other will get nullptr instead. If the crash handler loses the
19+
// race and gets nullptr, it won't run any of the registered error handlers.
20+
using FailureFunctionList = std::list<const FatalErrorHandlerInterface*>;
21+
ABSL_CONST_INIT std::atomic<FailureFunctionList*> fatal_error_handlers{nullptr};
22+
23+
} // namespace
24+
25+
void registerFatalErrorHandler(const FatalErrorHandlerInterface& handler) {
26+
#ifdef ENVOY_OBJECT_TRACE_ON_DUMP
27+
absl::MutexLock l(&failure_mutex);
28+
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed);
29+
if (list == nullptr) {
30+
list = new FailureFunctionList;
31+
}
32+
list->push_back(&handler);
33+
fatal_error_handlers.store(list, std::memory_order_release);
34+
#else
35+
UNREFERENCED_PARAMETER(handler);
36+
#endif
37+
}
38+
39+
void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler) {
40+
#ifdef ENVOY_OBJECT_TRACE_ON_DUMP
41+
absl::MutexLock l(&failure_mutex);
42+
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed);
43+
if (list == nullptr) {
44+
// removeFatalErrorHandler() may see an empty list of fatal error handlers
45+
// if it's called at the same time as callFatalErrorHandlers(). In that case
46+
// Envoy is in the middle of crashing anyway, but don't add a segfault on
47+
// top of the crash.
48+
return;
49+
}
50+
list->remove(&handler);
51+
if (list->empty()) {
52+
delete list;
53+
} else {
54+
fatal_error_handlers.store(list, std::memory_order_release);
55+
}
56+
#else
57+
UNREFERENCED_PARAMETER(handler);
58+
#endif
59+
}
60+
61+
void callFatalErrorHandlers(std::ostream& os) {
62+
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed);
63+
if (list != nullptr) {
64+
for (const auto* handler : *list) {
65+
handler->onFatalError(os);
66+
}
67+
delete list;
68+
}
69+
}
70+
71+
} // namespace FatalErrorHandler
72+
} // namespace Envoy
+26-5
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,39 @@
11
#pragma once
22

3+
#include <ostream>
4+
35
#include "envoy/common/pure.h"
46

57
namespace Envoy {
68

79
// A simple class which allows registering functions to be called when Envoy
8-
// receives one of the fatal signals, documented below.
9-
//
10-
// This is split out of signal_action.h because it is exempted from various
11-
// builds.
10+
// receives one of the fatal signals, documented in signal_action.h.
1211
class FatalErrorHandlerInterface {
1312
public:
1413
virtual ~FatalErrorHandlerInterface() = default;
15-
virtual void onFatalError() const PURE;
14+
// Called when Envoy receives a fatal signal. Must be async-signal-safe: in
15+
// particular, it can't allocate memory.
16+
virtual void onFatalError(std::ostream& os) const PURE;
1617
};
1718

19+
namespace FatalErrorHandler {
20+
/**
21+
* Add this handler to the list of functions which will be called if Envoy
22+
* receives a fatal signal.
23+
*/
24+
void registerFatalErrorHandler(const FatalErrorHandlerInterface& handler);
25+
26+
/**
27+
* Removes this handler from the list of functions which will be called if Envoy
28+
* receives a fatal signal.
29+
*/
30+
void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler);
31+
32+
/**
33+
* Calls and unregisters the fatal error handlers registered with
34+
* registerFatalErrorHandler. This is async-signal-safe and intended to be
35+
* called from a fatal signal handler.
36+
*/
37+
void callFatalErrorHandlers(std::ostream& os);
38+
} // namespace FatalErrorHandler
1839
} // namespace Envoy

source/common/signal/signal_action.cc

+2-47
Original file line numberDiff line numberDiff line change
@@ -9,46 +9,6 @@
99

1010
namespace Envoy {
1111

12-
ABSL_CONST_INIT static absl::Mutex failure_mutex(absl::kConstInit);
13-
// Since we can't grab the failure mutex on fatal error (snagging locks under
14-
// fatal crash causing potential deadlocks) access the handler list as an atomic
15-
// operation, to minimize the chance that one thread is operating on the list
16-
// while the crash handler is attempting to access it.
17-
// This basically makes edits to the list thread-safe - if one thread is
18-
// modifying the list rather than crashing in the crash handler due to accessing
19-
// the list in a non-thread-safe manner, it simply won't log crash traces.
20-
using FailureFunctionList = std::list<const FatalErrorHandlerInterface*>;
21-
ABSL_CONST_INIT std::atomic<FailureFunctionList*> fatal_error_handlers{nullptr};
22-
23-
void SignalAction::registerFatalErrorHandler(const FatalErrorHandlerInterface& handler) {
24-
#ifdef ENVOY_OBJECT_TRACE_ON_DUMP
25-
absl::MutexLock l(&failure_mutex);
26-
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed);
27-
if (list == nullptr) {
28-
list = new FailureFunctionList;
29-
}
30-
list->push_back(&handler);
31-
fatal_error_handlers.store(list, std::memory_order_release);
32-
#else
33-
UNREFERENCED_PARAMETER(handler);
34-
#endif
35-
}
36-
37-
void SignalAction::removeFatalErrorHandler(const FatalErrorHandlerInterface& handler) {
38-
#ifdef ENVOY_OBJECT_TRACE_ON_DUMP
39-
absl::MutexLock l(&failure_mutex);
40-
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed);
41-
list->remove(&handler);
42-
if (list->empty()) {
43-
delete list;
44-
} else {
45-
fatal_error_handlers.store(list, std::memory_order_release);
46-
}
47-
#else
48-
UNREFERENCED_PARAMETER(handler);
49-
#endif
50-
}
51-
5212
constexpr int SignalAction::FATAL_SIGS[];
5313

5414
void SignalAction::sigHandler(int sig, siginfo_t* info, void* context) {
@@ -62,13 +22,8 @@ void SignalAction::sigHandler(int sig, siginfo_t* info, void* context) {
6222
}
6323
tracer.logTrace();
6424

65-
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr, std::memory_order_relaxed);
66-
if (list) {
67-
// Finally after logging the stack trace, call any registered crash handlers.
68-
for (const auto* handler : *list) {
69-
handler->onFatalError();
70-
}
71-
}
25+
// Finally after logging the stack trace, call any registered crash handlers.
26+
FatalErrorHandler::callFatalErrorHandlers(std::cerr);
7227

7328
signal(sig, SIG_DFL);
7429
raise(sig);

source/common/signal/signal_action.h

-13
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,6 @@ class SignalAction : NonCopyable {
7373
*/
7474
static void sigHandler(int sig, siginfo_t* info, void* context);
7575

76-
/**
77-
* Add this handler to the list of functions which will be called if Envoy
78-
* receives a fatal signal.
79-
*/
80-
static void registerFatalErrorHandler(const FatalErrorHandlerInterface& handler);
81-
82-
/**
83-
* Removes this handler from the list of functions which will be called if Envoy
84-
* receives a fatal signal.
85-
*/
86-
static void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler);
87-
8876
private:
8977
/**
9078
* Allocate this many bytes on each side of the area used for alt stack.
@@ -142,7 +130,6 @@ class SignalAction : NonCopyable {
142130
char* altstack_{};
143131
std::array<struct sigaction, sizeof(FATAL_SIGS) / sizeof(int)> previous_handlers_;
144132
stack_t previous_altstack_;
145-
std::list<const FatalErrorHandlerInterface*> fatal_error_handlers_;
146133
};
147134

148135
} // namespace Envoy

test/common/event/dispatcher_impl_test.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ TEST_F(DispatcherImplTest, TimerWithScope) {
375375
timer = dispatcher_->createTimer([this]() {
376376
{
377377
Thread::LockGuard lock(mu_);
378-
static_cast<DispatcherImpl*>(dispatcher_.get())->onFatalError();
378+
static_cast<DispatcherImpl*>(dispatcher_.get())->onFatalError(std::cerr);
379379
work_finished_ = true;
380380
}
381381
cv_.notifyOne();

test/common/signal/BUILD

+2
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@ envoy_cc_test(
1717
"skip_on_windows",
1818
],
1919
deps = [
20+
"//source/common/signal:fatal_error_handler_lib",
2021
"//source/common/signal:sigaction_lib",
22+
"//test/common/stats:stat_test_utility_lib",
2123
"//test/test_common:utility_lib",
2224
],
2325
)

test/common/signal/signals_test.cc

+66-6
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,10 @@
22

33
#include <csignal>
44

5+
#include "common/signal/fatal_error_handler.h"
56
#include "common/signal/signal_action.h"
67

8+
#include "test/common/stats/stat_test_utility.h"
79
#include "test/test_common/utility.h"
810

911
#include "gtest/gtest.h"
@@ -19,6 +21,12 @@ namespace Envoy {
1921
#define ASANITIZED /* Sanitized by GCC */
2022
#endif
2123

24+
// Use this test handler instead of a mock, because fatal error handlers must be
25+
// signal-safe and a mock might allocate memory.
26+
class TestFatalErrorHandler : public FatalErrorHandlerInterface {
27+
void onFatalError(std::ostream& os) const override { os << "HERE!"; }
28+
};
29+
2230
// Death tests that expect a particular output are disabled under address sanitizer.
2331
// The sanitizer does its own special signal handling and prints messages that are
2432
// not ours instead of what this test expects. As of latest Clang this appears
@@ -35,13 +43,9 @@ TEST(SignalsDeathTest, InvalidAddressDeathTest) {
3543
"backtrace.*Segmentation fault");
3644
}
3745

38-
class TestFatalErrorHandler : public FatalErrorHandlerInterface {
39-
void onFatalError() const override { std::cerr << "HERE!"; }
40-
};
41-
4246
TEST(SignalsDeathTest, RegisteredHandlerTest) {
4347
TestFatalErrorHandler handler;
44-
SignalAction::registerFatalErrorHandler(handler);
48+
FatalErrorHandler::registerFatalErrorHandler(handler);
4549
SignalAction actions;
4650
// Make sure the fatal error log "HERE" registered above is logged on fatal error.
4751
EXPECT_DEATH(
@@ -51,7 +55,7 @@ TEST(SignalsDeathTest, RegisteredHandlerTest) {
5155
*(nasty_ptr) = 0; // NOLINT(clang-analyzer-core.NullDereference)
5256
}(),
5357
"HERE");
54-
SignalAction::removeFatalErrorHandler(handler);
58+
FatalErrorHandler::removeFatalErrorHandler(handler);
5559
}
5660

5761
TEST(SignalsDeathTest, BusDeathTest) {
@@ -145,4 +149,60 @@ TEST(Signals, HandlerTest) {
145149
SignalAction::sigHandler(SIGURG, &fake_si, nullptr);
146150
}
147151

152+
TEST(FatalErrorHandler, CallHandler) {
153+
// Reserve space in advance so that the handler doesn't allocate memory.
154+
std::string s;
155+
s.reserve(1024);
156+
std::ostringstream os(std::move(s));
157+
158+
TestFatalErrorHandler handler;
159+
FatalErrorHandler::registerFatalErrorHandler(handler);
160+
161+
FatalErrorHandler::callFatalErrorHandlers(os);
162+
EXPECT_EQ(os.str(), "HERE!");
163+
164+
// callFatalErrorHandlers() will unregister the handler, so this isn't
165+
// necessary for cleanup. Call it anyway, to simulate the case when one thread
166+
// tries to remove the handler while another thread crashes.
167+
FatalErrorHandler::removeFatalErrorHandler(handler);
168+
}
169+
170+
// Use this specialized test handler instead of a mock, because fatal error
171+
// handlers must be signal-safe and a mock might allocate memory.
172+
class MemoryCheckingFatalErrorHandler : public FatalErrorHandlerInterface {
173+
public:
174+
MemoryCheckingFatalErrorHandler(const Stats::TestUtil::MemoryTest& memory_test,
175+
uint64_t& allocated_after_call)
176+
: memory_test_(memory_test), allocated_after_call_(allocated_after_call) {}
177+
void onFatalError(std::ostream& os) const override {
178+
UNREFERENCED_PARAMETER(os);
179+
allocated_after_call_ = memory_test_.consumedBytes();
180+
}
181+
182+
private:
183+
const Stats::TestUtil::MemoryTest& memory_test_;
184+
uint64_t& allocated_after_call_;
185+
};
186+
187+
// FatalErrorHandler::callFatalErrorHandlers shouldn't allocate any heap memory,
188+
// so that it's safe to call from a signal handler. Test by comparing the
189+
// allocated memory before a call with the allocated memory during a handler.
190+
TEST(FatalErrorHandler, DontAllocateMemory) {
191+
// Reserve space in advance so that the handler doesn't allocate memory.
192+
std::string s;
193+
s.reserve(1024);
194+
std::ostringstream os(std::move(s));
195+
196+
Stats::TestUtil::MemoryTest memory_test;
197+
198+
uint64_t allocated_after_call;
199+
MemoryCheckingFatalErrorHandler handler(memory_test, allocated_after_call);
200+
FatalErrorHandler::registerFatalErrorHandler(handler);
201+
202+
uint64_t allocated_before_call = memory_test.consumedBytes();
203+
FatalErrorHandler::callFatalErrorHandlers(os);
204+
205+
EXPECT_MEMORY_EQ(allocated_after_call, allocated_before_call);
206+
}
207+
148208
} // namespace Envoy

0 commit comments

Comments
 (0)