Skip to content
4 changes: 2 additions & 2 deletions integrating.md
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,8 @@ Ensure `mongocrypt_setopt_retry_kms` is called on the `mongocrypt_t` to enable r
d. Feed the reply back with `mongocrypt_kms_ctx_feed`. Repeat
> until `mongocrypt_kms_ctx_bytes_needed` returns 0.

If any step encounters a network error, call `mongocrypt_kms_ctx_fail`.
If `mongocrypt_kms_ctx_fail` returns true, continue to the next KMS context.
If any step encounters a network error or if `mongocrypt_kms_ctx_should_retry` returns true after feeding the reply, call `mongocrypt_kms_ctx_fail`.
If `mongocrypt_kms_ctx_fail` returns true, retry the request by continuing to the next KMS context or by feeding the new response into the same context.
If `mongocrypt_kms_ctx_fail` returns false, abort and report an error. Consider wrapping the error reported in `mongocrypt_kms_ctx_status` to include the last network error.

Copy link
Contributor

@kevinAlbs kevinAlbs Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The note below might be confusing, since it mentions fanning out requests with the older retry behavior:

Note, the driver MAY fan out KMS requests in parallel. More KMS requests may be added when processing responses to retry.

Consider adding a section describing retry and iteration. Idea:

Retry and Iteration

Retry behavior is enabled by calling mongocrypt_setopt_retry_kms.

There are two options for retry:

  • Lazy retry: After processing KMS contexts, iterate again by calling mongocrypt_ctx_next_kms_ctx. KMS contexts needing a retry will be returned.
  • In-place retry: If a KMS context indicates retry, retry the KMS request and feed to the response to the same KMS request. Use mongocrypt_kms_ctx_feed_with_retry and check the return of mongocrypt_kms_ctx_fail to check if a retry is indicated.

The driver MAY fan out KMS requests in parallel. It is not safe to iterate KMS contexts (i.e. call mongocrypt_ctx_next_kms_ctx) while operating on KMS contexts (e.g. calling mongocrypt_kms_ctx_feed). Drivers are recommended to do an in-place retry on KMS requests.

2. When done feeding all replies, call `mongocrypt_ctx_kms_done`.
Expand Down
44 changes: 27 additions & 17 deletions src/mongocrypt-kms-ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,28 @@ static bool _ctx_done_kmip_decrypt(mongocrypt_kms_ctx_t *kms_ctx) {
return ret;
}

static bool _is_retryable_req(_kms_request_type_t req_type) {
// Check if request type is retryable. Some requests are non-idempotent and cannot be safely retried.
_kms_request_type_t retryable_types[] = {MONGOCRYPT_KMS_AZURE_OAUTH,
MONGOCRYPT_KMS_GCP_OAUTH,
MONGOCRYPT_KMS_AWS_ENCRYPT,
MONGOCRYPT_KMS_AWS_DECRYPT,
MONGOCRYPT_KMS_AZURE_WRAPKEY,
MONGOCRYPT_KMS_AZURE_UNWRAPKEY,
MONGOCRYPT_KMS_GCP_ENCRYPT,
MONGOCRYPT_KMS_GCP_DECRYPT};
for (size_t i = 0; i < sizeof(retryable_types) / sizeof(retryable_types[0]); i++) {
if (retryable_types[i] == req_type) {
return true;
}
}
return false;
}

bool mongocrypt_kms_ctx_should_retry(mongocrypt_kms_ctx_t *kms) {
return kms && kms->should_retry;
}

bool mongocrypt_kms_ctx_fail(mongocrypt_kms_ctx_t *kms) {
if (!kms) {
return false;
Expand All @@ -1138,23 +1160,7 @@ bool mongocrypt_kms_ctx_fail(mongocrypt_kms_ctx_t *kms) {
return false;
}

// Check if request type is retryable. Some requests are non-idempotent and cannot be safely retried.
_kms_request_type_t retryable_types[] = {MONGOCRYPT_KMS_AZURE_OAUTH,
MONGOCRYPT_KMS_GCP_OAUTH,
MONGOCRYPT_KMS_AWS_ENCRYPT,
MONGOCRYPT_KMS_AWS_DECRYPT,
MONGOCRYPT_KMS_AZURE_WRAPKEY,
MONGOCRYPT_KMS_AZURE_UNWRAPKEY,
MONGOCRYPT_KMS_GCP_ENCRYPT,
MONGOCRYPT_KMS_GCP_DECRYPT};
bool is_retryable = false;
for (size_t i = 0; i < sizeof(retryable_types) / sizeof(retryable_types[0]); i++) {
if (retryable_types[i] == kms->req_type) {
is_retryable = true;
break;
}
}
if (!is_retryable) {
if (!_is_retryable_req(kms->req_type)) {
CLIENT_ERR("KMS request failed due to network error");
return false;
}
Expand All @@ -1178,6 +1184,10 @@ bool mongocrypt_kms_ctx_feed(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *byt
if (!mongocrypt_status_ok(status)) {
return false;
}
if (kms->should_retry) {
// This happens when a KMS context is reused in-place
kms->should_retry = false;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With mongocrypt_kms_ctx_feed_with_retry, I expect it would be a driver bug to call mongocrypt_kms_ctx_feed directly on a KMS context needing retry. Calling mongocrypt_kms_ctx_feed suggests the driver is not doing in-place retry.

Consider making this an error:

if (kms->should_retry) {
     CLIENT_ERR ("KMS context needs retry. Call mongocrypt_kms_ctx_feed_with_retry instead");
     return false;
}

And setting kms->should_retry to false in mongocrypt_kms_ctx_feed_with_retry.


if (!bytes) {
CLIENT_ERR("argument 'bytes' is required");
Expand Down
12 changes: 11 additions & 1 deletion src/mongocrypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -1180,14 +1180,24 @@ MONGOCRYPT_EXPORT
bool mongocrypt_kms_ctx_feed(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *bytes);

/**
* Indicate a network-level failure.
* Indicate a failure. Discards all data fed to this KMS context with @ref mongocrypt_kms_ctx_feed.
* The @ref mongocrypt_kms_ctx_t may be reused.
*
* @param[in] kms The @ref mongocrypt_kms_ctx_t.
* @return A boolean indicating whether the failed request may be retried.
*/
MONGOCRYPT_EXPORT
bool mongocrypt_kms_ctx_fail(mongocrypt_kms_ctx_t *kms);

/**
* Indicate if a KMS context is completed but should be retried.
*
* @param[in] kms The @ref mongocrypt_kms_ctx_t.
* @return A boolean indicating whether the failed request should be retried.
*/
MONGOCRYPT_EXPORT
bool mongocrypt_kms_ctx_should_retry(mongocrypt_kms_ctx_t *kms);

/**
* Get the status associated with a @ref mongocrypt_kms_ctx_t object.
*
Expand Down
7 changes: 7 additions & 0 deletions test/data/kms-aws/encrypt-response-partial.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
HTTP/1.1 200 OK
x-amzn-RequestId: deeb35e5-4ecb-4bf1-9af5-84a54ff0af0e
Content-Type: application/x-amz-json-1.1
Content-Length: 446
Connection: close

{"KeyId": "arn:aws:k
51 changes: 51 additions & 0 deletions test/test-mongocrypt-datakey.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,33 @@ static void _test_create_datakey_with_retry(_mongocrypt_tester_t *tester) {
mongocrypt_destroy(crypt);
}

// Test that an HTTP error is retried in-place.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt);
ASSERT_OK(
mongocrypt_ctx_setopt_key_encryption_key(ctx,
TEST_BSON("{'provider': 'aws', 'key': 'foo', 'region': 'bar'}")),
ctx);
ASSERT_OK(mongocrypt_ctx_datakey_init(ctx), ctx);
ASSERT_STATE_EQUAL(mongocrypt_ctx_state(ctx), MONGOCRYPT_CTX_NEED_KMS);
mongocrypt_kms_ctx_t *kms_ctx = mongocrypt_ctx_next_kms_ctx(ctx);
ASSERT_OK(kms_ctx, ctx);
// Expect no sleep is requested before any error.
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), ==, 0);
// Feed a retryable HTTP error.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/rmd/kms-decrypt-reply-429.txt")), kms_ctx);
// In-place retry is indicated.
ASSERT(mongocrypt_kms_ctx_should_retry(kms_ctx));
ASSERT(mongocrypt_kms_ctx_fail(kms_ctx));
// Feed a successful response.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response.txt")), kms_ctx);
ASSERT_OK(mongocrypt_ctx_kms_done(ctx), ctx);
_mongocrypt_tester_run_ctx_to(tester, ctx, MONGOCRYPT_CTX_DONE);
mongocrypt_ctx_destroy(ctx);
mongocrypt_destroy(crypt);
}

// Test that a network error is retried.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
Expand Down Expand Up @@ -454,6 +481,30 @@ static void _test_create_datakey_with_retry(_mongocrypt_tester_t *tester) {
mongocrypt_destroy(crypt);
}

// Test that a network error is retried in-place.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt);
ASSERT_OK(
mongocrypt_ctx_setopt_key_encryption_key(ctx,
TEST_BSON("{'provider': 'aws', 'key': 'foo', 'region': 'bar'}")),
ctx);
ASSERT_OK(mongocrypt_ctx_datakey_init(ctx), ctx);
ASSERT_STATE_EQUAL(mongocrypt_ctx_state(ctx), MONGOCRYPT_CTX_NEED_KMS);
mongocrypt_kms_ctx_t *kms_ctx = mongocrypt_ctx_next_kms_ctx(ctx);
ASSERT_OK(kms_ctx, ctx);
// Expect no sleep is requested before any error.
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), ==, 0);
// Mark a network error.
ASSERT_OK(mongocrypt_kms_ctx_fail(kms_ctx), kms_ctx);
// Feed a successful response.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response.txt")), kms_ctx);
ASSERT_OK(mongocrypt_ctx_kms_done(ctx), ctx);
_mongocrypt_tester_run_ctx_to(tester, ctx, MONGOCRYPT_CTX_DONE);
mongocrypt_ctx_destroy(ctx);
mongocrypt_destroy(crypt);
}

// Test that an oauth request is retried for a network error.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
Expand Down