forked from envoyproxy/envoy
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathfatal_error_handler.cc
189 lines (156 loc) · 6.13 KB
/
fatal_error_handler.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#include "common/signal/fatal_error_handler.h"
#include <atomic>
#include <list>
#include "envoy/event/dispatcher.h"
#include "common/common/assert.h"
#include "common/common/macros.h"
#include "common/signal/fatal_action.h"
#include "absl/base/attributes.h"
#include "absl/synchronization/mutex.h"
namespace Envoy {
namespace FatalErrorHandler {
namespace {
// The type of Fatal Actions.
enum class FatalActionType {
Safe,
Unsafe,
};
ABSL_CONST_INIT static absl::Mutex failure_mutex(absl::kConstInit);
// Since we can't grab the failure mutex on fatal error (snagging locks under
// fatal crash causing potential deadlocks) access the handler list as an atomic
// operation, which is async-signal-safe. If the crash handler runs at the same
// time as another thread tries to modify the list, one of them will get the
// list and the other will get nullptr instead. If the crash handler loses the
// race and gets nullptr, it won't run any of the registered error handlers.
using FailureFunctionList = std::list<const FatalErrorHandlerInterface*>;
ABSL_CONST_INIT std::atomic<FailureFunctionList*> fatal_error_handlers{nullptr};
// Use an atomic operation since on fatal error we'll consume the
// fatal_action_manager and don't want to have any locks as they aren't
// async-signal-safe.
ABSL_CONST_INIT std::atomic<FatalAction::FatalActionManager*> fatal_action_manager{nullptr};
ABSL_CONST_INIT std::atomic<int64_t> failure_tid{-1};
// Executes the Fatal Actions provided.
void runFatalActionsInternal(const FatalAction::FatalActionPtrList& actions) {
// Exchange the fatal_error_handlers pointer so other functions cannot
// concurrently access the list.
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr);
if (list == nullptr) {
return;
}
// Get the dispatcher and its tracked object.
for (auto* handler : *list) {
handler->runFatalActionsOnTrackedObject(actions);
}
// Restore the fatal_error_handlers pointer so subsequent calls using the list
// can succeed.
fatal_error_handlers.store(list);
}
// Helper function to run exclusively either safe or unsafe actions depending on
// the provided action_type.
// Returns a FatalAction status corresponding to our attempt to run the
// action_type.
FatalAction::Status runFatalActions(FatalActionType action_type) {
// Check that registerFatalActions has already been called.
FatalAction::FatalActionManager* action_manager = fatal_action_manager.load();
if (action_manager == nullptr) {
return FatalAction::Status::ActionManagerUnset;
}
int64_t my_tid = action_manager->getThreadFactory().currentThreadId().getId();
if (action_type == FatalActionType::Safe) {
// Try to run safe actions
int64_t expected_tid = -1;
if (failure_tid.compare_exchange_strong(expected_tid, my_tid)) {
// Run the actions
runFatalActionsInternal(action_manager->getSafeActions());
return FatalAction::Status::Success;
} else if (expected_tid == my_tid) {
return FatalAction::Status::AlreadyRanOnThisThread;
}
} else {
// Try to run unsafe actions
int64_t failing_tid = failure_tid.load();
ASSERT(failing_tid != -1);
if (my_tid == failing_tid) {
runFatalActionsInternal(action_manager->getUnsafeActions());
return FatalAction::Status::Success;
}
}
return FatalAction::Status::RunningOnAnotherThread;
}
} // namespace
void registerFatalErrorHandler(const FatalErrorHandlerInterface& handler) {
#ifdef ENVOY_OBJECT_TRACE_ON_DUMP
absl::MutexLock l(&failure_mutex);
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr);
if (list == nullptr) {
list = new FailureFunctionList;
}
list->push_back(&handler);
// Store the fatal_error_handlers pointer now that the list is updated.
fatal_error_handlers.store(list);
#else
UNREFERENCED_PARAMETER(handler);
#endif
}
void removeFatalErrorHandler(const FatalErrorHandlerInterface& handler) {
#ifdef ENVOY_OBJECT_TRACE_ON_DUMP
absl::MutexLock l(&failure_mutex);
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr);
if (list == nullptr) {
// removeFatalErrorHandler() may see an empty list of fatal error handlers
// if it's called at the same time as callFatalErrorHandlers(). In that case
// Envoy is in the middle of crashing anyway, but don't add a segfault on
// top of the crash.
return;
}
list->remove(&handler);
if (list->empty()) {
delete list;
} else {
fatal_error_handlers.store(list);
}
#else
UNREFERENCED_PARAMETER(handler);
#endif
}
void callFatalErrorHandlers(std::ostream& os) {
FailureFunctionList* list = fatal_error_handlers.exchange(nullptr);
if (list != nullptr) {
for (const auto* handler : *list) {
handler->onFatalError(os);
}
fatal_error_handlers.store(list);
}
}
void registerFatalActions(FatalAction::FatalActionPtrList safe_actions,
FatalAction::FatalActionPtrList unsafe_actions,
Thread::ThreadFactory& thread_factory) {
// Create a FatalActionManager and store it.
FatalAction::FatalActionManager* previous_manager =
fatal_action_manager.exchange(new FatalAction::FatalActionManager(
std::move(safe_actions), std::move(unsafe_actions), thread_factory));
// Previous manager should be NULL.
ASSERT(!previous_manager);
}
FatalAction::Status runSafeActions() { return runFatalActions(FatalActionType::Safe); }
FatalAction::Status runUnsafeActions() { return runFatalActions(FatalActionType::Unsafe); }
void clearFatalActionsOnTerminate() {
auto* raw_ptr = fatal_action_manager.exchange(nullptr);
if (raw_ptr != nullptr) {
delete raw_ptr;
}
}
// This resets the internal state of Fatal Action for the module.
// This is necessary as it allows us to have multiple test cases invoke the
// fatal actions without state from other tests leaking in.
void resetFatalActionStateForTest() {
// Free the memory of the Fatal Action, since it's not managed by a smart
// pointer. This prevents memory leaks in tests.
auto* raw_ptr = fatal_action_manager.exchange(nullptr);
if (raw_ptr != nullptr) {
delete raw_ptr;
}
failure_tid.store(-1);
}
} // namespace FatalErrorHandler
} // namespace Envoy