Skip to content

Commit 4e74ab1

Browse files
TingDaoKgraebm
andauthored
Fix websocket shutdown behavior (#483)
The bug was introduced in [PR #474](https://github.com/awslabs/aws-c-http/pull/474/files#diff-ee776c7576cfff50a64158d59a6173ab9a0aa373150574aa9987b4f8726b58e3) - `is_writing_stopped = true` shouldn't be set directly, there's a helper function `s_stop_writing()` that ensures subsequent calls to `aws_websocket_send_frame()` will fail. Let's take a whole new approach these channel-shutdown-window-deadlock issues: - add `s_stop_reading_and_dont_block_shutdown()` function that sets `is_reading_stopped = true`, but also increments the read window so that channel shutdown won't deadlock. - Most places that were setting `is_reading_stopped = true` now use this helper instead - Revamp how `aws_channel_shutdown()` is called. Lots of channel behavior has changed since [this websocket code was written](#48). - If on the channel-thread, just call `aws_channel_shutdown()` - now that [aws_channel_shutdown()](awslabs/aws-c-io#172) is always async, we don't need to defensively schedule a task to call it - If off-thread, use `s_schedule_channel_shutdown_from_offthead()` - now that this is only called from `aws_websocket_close()`, or when the refcount goes to zero, we can assume the user is OK if reading stops, and it can call `s_stop_reading_and_dont_block_shutdown()` on the way to shutting down. - Add the test to verify that send after close should fail Co-authored-by: Michael Graeb <[email protected]>
1 parent 7db2452 commit 4e74ab1

File tree

3 files changed

+94
-64
lines changed

3 files changed

+94
-64
lines changed

source/websocket.c

Lines changed: 61 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ struct aws_websocket {
4343
aws_websocket_on_incoming_frame_complete_fn *on_incoming_frame_complete;
4444

4545
struct aws_channel_task move_synced_data_to_thread_task;
46-
struct aws_channel_task shutdown_channel_task;
46+
struct aws_channel_task shutdown_channel_from_offthread_task;
4747
struct aws_channel_task increment_read_window_task;
4848
struct aws_channel_task waiting_on_payload_stream_task;
4949
struct aws_channel_task close_timeout_task;
@@ -85,7 +85,10 @@ struct aws_websocket {
8585
/* True when no more frames will be read, due to:
8686
* - a CLOSE frame was received
8787
* - decoder error
88-
* - channel shutdown in read-dir */
88+
* - channel shutdown in read-dir
89+
* - user calling aws_websocket_close()
90+
* - user dropping the last refcount
91+
*/
8992
bool is_reading_stopped;
9093

9194
/* True when no more frames will be written, due to:
@@ -124,9 +127,9 @@ struct aws_websocket {
124127
/* Error-code returned by aws_websocket_send_frame() when is_writing_stopped is true */
125128
int send_frame_error_code;
126129

127-
/* Use a task to issue a channel shutdown. */
128-
int shutdown_channel_task_error_code;
129-
bool is_shutdown_channel_task_scheduled;
130+
/* Use a task to issue a channel shutdown from off-thread. */
131+
int shutdown_channel_from_offthread_task_error_code;
132+
bool is_shutdown_channel_from_offthread_task_scheduled;
130133

131134
bool is_move_synced_data_to_thread_task_scheduled;
132135

@@ -186,10 +189,13 @@ static bool s_midchannel_send_payload(struct aws_websocket *websocket, struct aw
186189
static void s_midchannel_send_complete(struct aws_websocket *websocket, int error_code, void *user_data);
187190
static void s_move_synced_data_to_thread_task(struct aws_channel_task *task, void *arg, enum aws_task_status status);
188191
static void s_increment_read_window_task(struct aws_channel_task *task, void *arg, enum aws_task_status status);
189-
static void s_shutdown_channel_task(struct aws_channel_task *task, void *arg, enum aws_task_status status);
192+
static void s_shutdown_channel_from_offthread_task(
193+
struct aws_channel_task *task,
194+
void *arg,
195+
enum aws_task_status status);
190196
static void s_waiting_on_payload_stream_task(struct aws_channel_task *task, void *arg, enum aws_task_status status);
191197
static void s_close_timeout_task(struct aws_channel_task *task, void *arg, enum aws_task_status status);
192-
static void s_schedule_channel_shutdown(struct aws_websocket *websocket, int error_code);
198+
static void s_schedule_channel_shutdown_from_offthread(struct aws_websocket *websocket, int error_code);
193199
static void s_shutdown_due_to_write_err(struct aws_websocket *websocket, int error_code);
194200
static void s_shutdown_due_to_read_err(struct aws_websocket *websocket, int error_code);
195201
static void s_stop_writing(struct aws_websocket *websocket, int send_frame_error_code);
@@ -285,7 +291,10 @@ struct aws_websocket *aws_websocket_handler_new(const struct aws_websocket_handl
285291
websocket,
286292
"websocket_move_synced_data_to_thread");
287293
aws_channel_task_init(
288-
&websocket->shutdown_channel_task, s_shutdown_channel_task, websocket, "websocket_shutdown_channel");
294+
&websocket->shutdown_channel_from_offthread_task,
295+
s_shutdown_channel_from_offthread_task,
296+
websocket,
297+
"websocket_shutdown_channel");
289298
aws_channel_task_init(
290299
&websocket->increment_read_window_task,
291300
s_increment_read_window_task,
@@ -377,7 +386,7 @@ static void s_websocket_on_refcount_zero(void *user_data) {
377386
AWS_LS_HTTP_WEBSOCKET, "id=%p: Websocket ref-count is zero, shut down if necessary.", (void *)websocket);
378387

379388
/* Channel might already be shut down, but make sure */
380-
s_schedule_channel_shutdown(websocket, AWS_ERROR_SUCCESS);
389+
s_schedule_channel_shutdown_from_offthread(websocket, AWS_ERROR_SUCCESS);
381390

382391
/* Channel won't destroy its slots/handlers until its refcount reaches 0 */
383392
aws_channel_release_hold(websocket->channel_slot->channel);
@@ -897,6 +906,21 @@ static void s_complete_frame_list(struct aws_websocket *websocket, struct aws_li
897906
aws_linked_list_init(frames);
898907
}
899908

909+
/* Set is_reading_stopped = true, all further read data will be ignored.
910+
* But also increment the read window, so that channel shutdown won't deadlock
911+
* due to pending read-data in an upstream handler or the underlying OS socket. */
912+
static void s_stop_reading_and_dont_block_shutdown(struct aws_websocket *websocket) {
913+
AWS_ASSERT(aws_channel_thread_is_callers_thread(websocket->channel_slot->channel));
914+
if (websocket->thread_data.is_reading_stopped) {
915+
return;
916+
}
917+
918+
AWS_LOGF_TRACE(AWS_LS_HTTP_WEBSOCKET, "id=%p: Websocket will ignore any further read data.", (void *)websocket);
919+
websocket->thread_data.is_reading_stopped = true;
920+
921+
aws_channel_slot_increment_read_window(websocket->channel_slot, SIZE_MAX);
922+
}
923+
900924
static void s_stop_writing(struct aws_websocket *websocket, int send_frame_error_code) {
901925
AWS_ASSERT(aws_channel_thread_is_callers_thread(websocket->channel_slot->channel));
902926
AWS_ASSERT(send_frame_error_code != AWS_ERROR_SUCCESS);
@@ -947,7 +971,7 @@ static void s_shutdown_due_to_write_err(struct aws_websocket *websocket, int err
947971
(void *)websocket,
948972
error_code,
949973
aws_error_name(error_code));
950-
s_schedule_channel_shutdown(websocket, error_code);
974+
aws_channel_shutdown(websocket->channel_slot->channel, error_code);
951975
}
952976
}
953977

@@ -961,18 +985,22 @@ static void s_shutdown_due_to_read_err(struct aws_websocket *websocket, int erro
961985
error_code,
962986
aws_error_name(error_code));
963987

964-
websocket->thread_data.is_reading_stopped = true;
988+
s_stop_reading_and_dont_block_shutdown(websocket);
965989

966990
/* If there's a current incoming frame, complete it with the specific error code. */
967991
if (websocket->thread_data.current_incoming_frame) {
968992
s_complete_incoming_frame(websocket, error_code, NULL);
969993
}
970994

971995
/* Tell channel to shutdown (it's ok to call this redundantly) */
972-
s_schedule_channel_shutdown(websocket, error_code);
996+
aws_channel_shutdown(websocket->channel_slot->channel, error_code);
973997
}
974998

975-
static void s_shutdown_channel_task(struct aws_channel_task *task, void *arg, enum aws_task_status status) {
999+
static void s_shutdown_channel_from_offthread_task(
1000+
struct aws_channel_task *task,
1001+
void *arg,
1002+
enum aws_task_status status) {
1003+
9761004
(void)task;
9771005

9781006
if (status != AWS_TASK_STATUS_RUN_READY) {
@@ -985,39 +1013,39 @@ static void s_shutdown_channel_task(struct aws_channel_task *task, void *arg, en
9851013
/* BEGIN CRITICAL SECTION */
9861014
s_lock_synced_data(websocket);
9871015

988-
error_code = websocket->synced_data.shutdown_channel_task_error_code;
1016+
error_code = websocket->synced_data.shutdown_channel_from_offthread_task_error_code;
9891017

9901018
s_unlock_synced_data(websocket);
9911019
/* END CRITICAL SECTION */
992-
websocket->thread_data.is_reading_stopped = true;
993-
websocket->thread_data.is_writing_stopped = true;
1020+
1021+
/* Stop reading, so that shutdown won't be blocked.
1022+
* If something off-thread is causing shutdown (aws_websocket_close(), refcount 0, etc),
1023+
* the user may never interact with the websocket again. We can't rely on them
1024+
* to keep the window open and prevent deadlock during shutdown. */
1025+
s_stop_reading_and_dont_block_shutdown(websocket);
9941026

9951027
aws_channel_shutdown(websocket->channel_slot->channel, error_code);
996-
/* Increase the window size after shutdown starts, to prevent deadlock when data still pending in the upstream
997-
* handler. */
998-
aws_channel_slot_increment_read_window(websocket->channel_slot, SIZE_MAX);
9991028
}
10001029

1001-
/* Tell the channel to shut down. It is safe to call this multiple times.
1002-
* The call to aws_channel_shutdown() is delayed so that a user invoking aws_websocket_close doesn't
1003-
* have completion callbacks firing before the function call even returns */
1004-
static void s_schedule_channel_shutdown(struct aws_websocket *websocket, int error_code) {
1030+
/* Tell the channel to shut down, from off-thread. It is safe to call this multiple times. */
1031+
static void s_schedule_channel_shutdown_from_offthread(struct aws_websocket *websocket, int error_code) {
10051032
bool schedule_shutdown = false;
10061033

10071034
/* BEGIN CRITICAL SECTION */
10081035
s_lock_synced_data(websocket);
10091036

1010-
if (!websocket->synced_data.is_shutdown_channel_task_scheduled) {
1037+
if (!websocket->synced_data.is_shutdown_channel_from_offthread_task_scheduled) {
10111038
schedule_shutdown = true;
1012-
websocket->synced_data.is_shutdown_channel_task_scheduled = true;
1013-
websocket->synced_data.shutdown_channel_task_error_code = error_code;
1039+
websocket->synced_data.is_shutdown_channel_from_offthread_task_scheduled = true;
1040+
websocket->synced_data.shutdown_channel_from_offthread_task_error_code = error_code;
10141041
}
10151042

10161043
s_unlock_synced_data(websocket);
10171044
/* END CRITICAL SECTION */
10181045

10191046
if (schedule_shutdown) {
1020-
aws_channel_schedule_task_now(websocket->channel_slot->channel, &websocket->shutdown_channel_task);
1047+
aws_channel_schedule_task_now(
1048+
websocket->channel_slot->channel, &websocket->shutdown_channel_from_offthread_task);
10211049
}
10221050
}
10231051

@@ -1038,14 +1066,13 @@ void aws_websocket_close(struct aws_websocket *websocket, bool free_scarce_resou
10381066
return;
10391067
}
10401068

1041-
/* TODO: aws_channel_shutdown() should let users specify error_code and "immediate" as separate parameters.
1042-
* Currently, any non-zero error_code results in "immediate" shutdown */
1069+
/* TODO: aws_channel_shutdown() should let users specify error_code and "immediate" as separate parameters. */
10431070
int error_code = AWS_ERROR_SUCCESS;
10441071
if (free_scarce_resources_immediately) {
10451072
error_code = AWS_ERROR_HTTP_CONNECTION_CLOSED;
10461073
}
10471074

1048-
s_schedule_channel_shutdown(websocket, error_code);
1075+
s_schedule_channel_shutdown_from_offthread(websocket, error_code);
10491076
}
10501077

10511078
static int s_handler_shutdown(
@@ -1255,17 +1282,7 @@ static int s_handler_process_read_message(
12551282
}
12561283

12571284
if (websocket->thread_data.incoming_message_window_update > 0) {
1258-
err = aws_channel_slot_increment_read_window(slot, websocket->thread_data.incoming_message_window_update);
1259-
if (err) {
1260-
AWS_LOGF_ERROR(
1261-
AWS_LS_HTTP_WEBSOCKET,
1262-
"id=%p: Failed to increment read window after message processing, error %d (%s). Closing "
1263-
"connection.",
1264-
(void *)websocket,
1265-
aws_last_error(),
1266-
aws_error_name(aws_last_error()));
1267-
goto error;
1268-
}
1285+
aws_channel_slot_increment_read_window(slot, websocket->thread_data.incoming_message_window_update);
12691286
}
12701287

12711288
goto clean_up;
@@ -1508,7 +1525,7 @@ static void s_complete_incoming_frame(struct aws_websocket *websocket, int error
15081525
AWS_LS_HTTP_WEBSOCKET,
15091526
"id=%p: Close frame received, any further data received will be ignored.",
15101527
(void *)websocket);
1511-
websocket->thread_data.is_reading_stopped = true;
1528+
s_stop_reading_and_dont_block_shutdown(websocket);
15121529

15131530
/* TODO: auto-close if there's a channel-handler to the right */
15141531

@@ -1598,37 +1615,17 @@ static int s_handler_increment_read_window(
15981615
}
15991616

16001617
if (increment != 0) {
1601-
int err = aws_channel_slot_increment_read_window(slot, increment);
1602-
if (err) {
1603-
goto error;
1604-
}
1618+
aws_channel_slot_increment_read_window(slot, increment);
16051619
}
16061620

16071621
return AWS_OP_SUCCESS;
16081622

16091623
error:
1610-
websocket->thread_data.is_reading_stopped = true;
16111624
/* Shutting down channel because I know that no one ever checks these errors */
16121625
s_shutdown_due_to_read_err(websocket, aws_last_error());
16131626
return AWS_OP_ERR;
16141627
}
16151628

1616-
static void s_increment_read_window_action(struct aws_websocket *websocket, size_t size) {
1617-
AWS_ASSERT(aws_channel_thread_is_callers_thread(websocket->channel_slot->channel));
1618-
1619-
int err = aws_channel_slot_increment_read_window(websocket->channel_slot, size);
1620-
if (err) {
1621-
AWS_LOGF_ERROR(
1622-
AWS_LS_HTTP_WEBSOCKET,
1623-
"id=%p: Failed to increment read window, error %d (%s). Closing websocket.",
1624-
(void *)websocket,
1625-
aws_last_error(),
1626-
aws_error_name(aws_last_error()));
1627-
1628-
s_schedule_channel_shutdown(websocket, aws_last_error());
1629-
}
1630-
}
1631-
16321629
static void s_increment_read_window_task(struct aws_channel_task *task, void *arg, enum aws_task_status status) {
16331630
(void)task;
16341631

@@ -1651,7 +1648,7 @@ static void s_increment_read_window_task(struct aws_channel_task *task, void *ar
16511648
AWS_LOGF_TRACE(
16521649
AWS_LS_HTTP_WEBSOCKET, "id=%p: Running task to increment read window by %zu.", (void *)websocket, size);
16531650

1654-
s_increment_read_window_action(websocket, size);
1651+
aws_channel_slot_increment_read_window(websocket->channel_slot, size);
16551652
}
16561653

16571654
void aws_websocket_increment_read_window(struct aws_websocket *websocket, size_t size) {

tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ add_test_case(websocket_handler_window_manual_increment)
226226
add_test_case(websocket_handler_window_manual_increment_off_thread)
227227
add_test_case(websocket_handler_sends_pong_automatically)
228228
add_test_case(websocket_handler_wont_send_pong_after_close_frame)
229+
add_test_case(websocket_handler_send_frame_fails_if_websocket_closed)
229230
add_test_case(websocket_midchannel_sanity_check)
230231
add_test_case(websocket_midchannel_write_message)
231232
add_test_case(websocket_midchannel_write_multiple_messages)

tests/test_websocket_handler.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1883,6 +1883,38 @@ TEST_CASE(websocket_handler_wont_send_pong_after_close_frame) {
18831883
return AWS_OP_SUCCESS;
18841884
}
18851885

1886+
/* This is a regression test. If aws_websocket_close() leads to shutdown,
1887+
* then subsequent calls to aws_websocket_send_frame() should fail. */
1888+
TEST_CASE(websocket_handler_send_frame_fails_if_websocket_closed) {
1889+
(void)ctx;
1890+
(void)ctx;
1891+
struct tester tester;
1892+
ASSERT_SUCCESS(s_tester_init(&tester, allocator));
1893+
1894+
/* Call aws_websocket_close() and wait for shutdown to complete */
1895+
testing_channel_set_is_on_users_thread(&tester.testing_channel, false);
1896+
aws_websocket_close(tester.websocket, false);
1897+
testing_channel_set_is_on_users_thread(&tester.testing_channel, true);
1898+
1899+
ASSERT_SUCCESS(s_drain_written_messages(&tester));
1900+
ASSERT_TRUE(testing_channel_is_shutdown_completed(&tester.testing_channel));
1901+
1902+
/* aws_websocket_send_frame() should fail */
1903+
struct aws_byte_cursor payload = aws_byte_cursor_from_c_str("bitter butter.");
1904+
struct send_tester send = {
1905+
.payload = payload,
1906+
.def =
1907+
{
1908+
.opcode = AWS_WEBSOCKET_OPCODE_PING,
1909+
.fin = true,
1910+
},
1911+
};
1912+
ASSERT_FAILS(s_send_frame(&tester, &send));
1913+
ASSERT_UINT_EQUALS(AWS_ERROR_HTTP_WEBSOCKET_CLOSE_FRAME_SENT, aws_last_error());
1914+
ASSERT_SUCCESS(s_tester_clean_up(&tester));
1915+
return AWS_OP_SUCCESS;
1916+
}
1917+
18861918
TEST_CASE(websocket_midchannel_read_message) {
18871919
(void)ctx;
18881920
struct tester tester;

0 commit comments

Comments
 (0)