-
Notifications
You must be signed in to change notification settings - Fork 713
coordinated compaction improvements #29486
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -62,20 +62,21 @@ compaction_coordinator::compaction_coordinator( | |||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| void compaction_coordinator::on_leadership_change( | ||||||||||||||
| std::optional<vnode> new_leader_id) { | ||||||||||||||
| std::optional<vnode> new_leader_id, model::term_id new_term) { | ||||||||||||||
| cancel_timer(); | ||||||||||||||
| bool new_is_leader = (new_leader_id && *new_leader_id == _self); | ||||||||||||||
| if (new_is_leader != _is_leader) { | ||||||||||||||
| _is_leader = new_is_leader; | ||||||||||||||
| if (_is_leader) { | ||||||||||||||
| arm_timer_if_needed(true); | ||||||||||||||
| } else { | ||||||||||||||
| cancel_timer(); | ||||||||||||||
| } | ||||||||||||||
| _need_force_update = new_is_leader | ||||||||||||||
| && (!_leader_term_id || new_term > *_leader_term_id); | ||||||||||||||
| if (new_is_leader) { | ||||||||||||||
| _leader_term_id = {new_term}; | ||||||||||||||
| arm_timer_if_needed(true); | ||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is there a risk of arming the timer twice if the same replica is elected leader back to back? |
||||||||||||||
| } else { | ||||||||||||||
| _leader_term_id = std::nullopt; | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| void compaction_coordinator::on_group_configuration_change() { | ||||||||||||||
| if (_started && _is_leader) { | ||||||||||||||
| if (_started && _leader_term_id) { | ||||||||||||||
| recalculate_group_offsets(); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
@@ -124,7 +125,7 @@ compaction_coordinator::do_distribute_group_offsets( | |||||||||||||
| // We may be a follower lagging behind, so our MCCO or even max offset | ||||||||||||||
| // may be below MTRO. Fix that: any log below MTRO, even not replicated | ||||||||||||||
| // yet, is cleanly compacted. Same for MXFO/MXRO. | ||||||||||||||
| on_local_replica_offsets_update(req.mtro, req.mxro); | ||||||||||||||
| record_updated_local_replica_offsets(req.mtro, req.mxro); | ||||||||||||||
|
|
||||||||||||||
| return distribute_compaction_mtro_reply{ | ||||||||||||||
| .success = distribute_compaction_mtro_reply::is_success::yes}; | ||||||||||||||
|
|
@@ -180,13 +181,17 @@ void compaction_coordinator::on_ntp_config_change() { | |||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| void compaction_coordinator::update_local_replica_offsets() { | ||||||||||||||
| on_local_replica_offsets_update( | ||||||||||||||
| bool updated = record_updated_local_replica_offsets( | ||||||||||||||
| _log->cleanly_compacted_prefix_offset(), | ||||||||||||||
| _log->transaction_free_prefix_offset()); | ||||||||||||||
| if (_leader_term_id && updated) { | ||||||||||||||
| recalculate_group_offsets(); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| void compaction_coordinator::collect_all_replica_offsets() { | ||||||||||||||
| if (!_is_leader || _raft_as.abort_requested() || _raft_bg.is_closed()) { | ||||||||||||||
| if ( | ||||||||||||||
| !_leader_term_id || _raft_as.abort_requested() || _raft_bg.is_closed()) { | ||||||||||||||
| return; | ||||||||||||||
| } | ||||||||||||||
| update_local_replica_offsets(); | ||||||||||||||
|
|
@@ -281,7 +286,7 @@ ss::future<> compaction_coordinator::get_and_process_replica_offsets( | |||||||||||||
| = *maybe_remote_replica_offsets->mcco; | ||||||||||||||
| fs_it->second.max_transaction_free_offset | ||||||||||||||
| = *maybe_remote_replica_offsets->mxfo; | ||||||||||||||
| if (_is_leader) [[likely]] { | ||||||||||||||
| if (_leader_term_id) [[likely]] { | ||||||||||||||
| recalculate_group_offsets(); | ||||||||||||||
| } | ||||||||||||||
| } | ||||||||||||||
|
|
@@ -314,7 +319,8 @@ compaction_coordinator::get_remote_replica_offsets(vnode node_id) { | |||||||||||||
| co_return reply.value(); | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| void compaction_coordinator::on_local_replica_offsets_update( | ||||||||||||||
| // returns whether recorded offsets were changed | ||||||||||||||
| bool compaction_coordinator::record_updated_local_replica_offsets( | ||||||||||||||
| model::offset new_mcco, model::offset new_mxfo) { | ||||||||||||||
| bool mcco_updated = bump_offset_value( | ||||||||||||||
| &compaction_coordinator::_local_mcco, | ||||||||||||||
|
|
@@ -324,12 +330,14 @@ void compaction_coordinator::on_local_replica_offsets_update( | |||||||||||||
| &compaction_coordinator::_local_mxfo, | ||||||||||||||
| new_mxfo, | ||||||||||||||
| "local max transaction free offset"); | ||||||||||||||
| if (_is_leader && (mcco_updated || mxfo_updated)) { | ||||||||||||||
| recalculate_group_offsets(); | ||||||||||||||
| } | ||||||||||||||
| return mcco_updated || mxfo_updated; | ||||||||||||||
| } | ||||||||||||||
|
|
||||||||||||||
| void compaction_coordinator::send_group_offsets_to_followers() { | ||||||||||||||
| vlog( | ||||||||||||||
| _logger.debug, | ||||||||||||||
| "compaction coordinator planning to distribute group offsets in {}", | ||||||||||||||
bashtanov marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||||||||
| group_offsets_send_delay); | ||||||||||||||
|
Comment on lines
+339
to
+340
|
||||||||||||||
| "compaction coordinator planning to distribute group offsets in {}", | |
| group_offsets_send_delay); | |
| "compaction coordinator planning to distribute group offsets in {} ms", | |
| std::chrono::duration_cast<std::chrono::milliseconds>( | |
| group_offsets_send_delay) | |
| .count()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fmt prints (or rather should print) durations in human-readable form
Copilot
AI
Feb 3, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using _leader_term_id (an std::optional<model::term_id>) directly in a boolean context is incorrect. The assertion should check _leader_term_id.has_value() to properly verify whether there is a valid leader term ID.
| vassert(_leader_term_id, "only leader can recalculate group offsets"); | |
| vassert( | |
| _leader_term_id.has_value(), | |
| "only leader can recalculate group offsets"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no it's the same
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How could this get called with the right branch of the 'and' false
can this be called with mtro_updated false, mxro updated false, and need_force_update false?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
E.g. in the following situation. We are the leader and we have been the one in the current term for a while. The timer triggered collect_all_replica_offsets, it called update_local_replica_offsets. Local offsets have increased, so it called recalculate_group_offsets. However, the local offsets were not the lowest, as some replica is holding behind the compaction. new_mtro and new_mxro remained the same. update_group_offsets is called with the same values as at some point before. mtro_updated and mxro_updated are both false. _need_force_update is false as well, as it is not the first update in the current term.
Uh oh!
There was an error while loading. Please reload this page.