Skip to content

Commit ca1672b

Browse files
Revert "KAFKA-18404: Remove partitionMaxBytes usage from DelayedShareFetch (#17870)" (#18643)
This reverts commit b021b51. Reviewers: Ismael Juma <[email protected]>, Andrew Schofield <[email protected]>
1 parent 9e80d7e commit ca1672b

File tree

5 files changed

+55
-429
lines changed

5 files changed

+55
-429
lines changed

core/src/main/java/kafka/server/share/DelayedShareFetch.java

+42-66
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@
2727
import org.apache.kafka.server.purgatory.DelayedOperation;
2828
import org.apache.kafka.server.share.SharePartitionKey;
2929
import org.apache.kafka.server.share.fetch.DelayedShareFetchGroupKey;
30-
import org.apache.kafka.server.share.fetch.PartitionMaxBytesStrategy;
3130
import org.apache.kafka.server.share.fetch.ShareFetch;
3231
import org.apache.kafka.server.storage.log.FetchIsolation;
3332
import org.apache.kafka.server.storage.log.FetchPartitionData;
@@ -61,35 +60,24 @@ public class DelayedShareFetch extends DelayedOperation {
6160
private final ShareFetch shareFetch;
6261
private final ReplicaManager replicaManager;
6362
private final BiConsumer<SharePartitionKey, Throwable> exceptionHandler;
64-
private final PartitionMaxBytesStrategy partitionMaxBytesStrategy;
6563
// The topic partitions that need to be completed for the share fetch request are given by sharePartitions.
6664
// sharePartitions is a subset of shareFetchData. The order of insertion/deletion of entries in sharePartitions is important.
6765
private final LinkedHashMap<TopicIdPartition, SharePartition> sharePartitions;
68-
private LinkedHashMap<TopicIdPartition, Long> partitionsAcquired;
66+
private LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> partitionsAcquired;
6967
private LinkedHashMap<TopicIdPartition, LogReadResult> partitionsAlreadyFetched;
7068

7169
DelayedShareFetch(
7270
ShareFetch shareFetch,
7371
ReplicaManager replicaManager,
7472
BiConsumer<SharePartitionKey, Throwable> exceptionHandler,
7573
LinkedHashMap<TopicIdPartition, SharePartition> sharePartitions) {
76-
this(shareFetch, replicaManager, exceptionHandler, sharePartitions, PartitionMaxBytesStrategy.type(PartitionMaxBytesStrategy.StrategyType.UNIFORM));
77-
}
78-
79-
DelayedShareFetch(
80-
ShareFetch shareFetch,
81-
ReplicaManager replicaManager,
82-
BiConsumer<SharePartitionKey, Throwable> exceptionHandler,
83-
LinkedHashMap<TopicIdPartition, SharePartition> sharePartitions,
84-
PartitionMaxBytesStrategy partitionMaxBytesStrategy) {
8574
super(shareFetch.fetchParams().maxWaitMs, Optional.empty());
8675
this.shareFetch = shareFetch;
8776
this.replicaManager = replicaManager;
8877
this.partitionsAcquired = new LinkedHashMap<>();
8978
this.partitionsAlreadyFetched = new LinkedHashMap<>();
9079
this.exceptionHandler = exceptionHandler;
9180
this.sharePartitions = sharePartitions;
92-
this.partitionMaxBytesStrategy = partitionMaxBytesStrategy;
9381
}
9482

9583
@Override
@@ -111,7 +99,7 @@ public void onComplete() {
11199
partitionsAcquired.keySet());
112100

113101
try {
114-
LinkedHashMap<TopicIdPartition, Long> topicPartitionData;
102+
LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData;
115103
// tryComplete did not invoke forceComplete, so we need to check if we have any partitions to fetch.
116104
if (partitionsAcquired.isEmpty())
117105
topicPartitionData = acquirablePartitions();
@@ -133,13 +121,11 @@ public void onComplete() {
133121
}
134122
}
135123

136-
private void completeShareFetchRequest(LinkedHashMap<TopicIdPartition, Long> topicPartitionData) {
124+
private void completeShareFetchRequest(LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData) {
137125
try {
138126
LinkedHashMap<TopicIdPartition, LogReadResult> responseData;
139127
if (partitionsAlreadyFetched.isEmpty())
140-
responseData = readFromLog(
141-
topicPartitionData,
142-
partitionMaxBytesStrategy.maxBytes(shareFetch.fetchParams().maxBytes, topicPartitionData.keySet(), topicPartitionData.size()));
128+
responseData = readFromLog(topicPartitionData);
143129
else
144130
// There shouldn't be a case when we have a partitionsAlreadyFetched value here and this variable is getting
145131
// updated in a different tryComplete thread.
@@ -172,7 +158,7 @@ private void completeShareFetchRequest(LinkedHashMap<TopicIdPartition, Long> top
172158
*/
173159
@Override
174160
public boolean tryComplete() {
175-
LinkedHashMap<TopicIdPartition, Long> topicPartitionData = acquirablePartitions();
161+
LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData = acquirablePartitions();
176162

177163
try {
178164
if (!topicPartitionData.isEmpty()) {
@@ -181,7 +167,7 @@ public boolean tryComplete() {
181167
// those topic partitions.
182168
LinkedHashMap<TopicIdPartition, LogReadResult> replicaManagerReadResponse = maybeReadFromLog(topicPartitionData);
183169
maybeUpdateFetchOffsetMetadata(topicPartitionData, replicaManagerReadResponse);
184-
if (anyPartitionHasLogReadError(replicaManagerReadResponse) || isMinBytesSatisfied(topicPartitionData, partitionMaxBytesStrategy.maxBytes(shareFetch.fetchParams().maxBytes, topicPartitionData.keySet(), topicPartitionData.size()))) {
170+
if (anyPartitionHasLogReadError(replicaManagerReadResponse) || isMinBytesSatisfied(topicPartitionData)) {
185171
partitionsAcquired = topicPartitionData;
186172
partitionsAlreadyFetched = replicaManagerReadResponse;
187173
boolean completedByMe = forceComplete();
@@ -216,18 +202,28 @@ public boolean tryComplete() {
216202
* Prepare fetch request structure for partitions in the share fetch request for which we can acquire records.
217203
*/
218204
// Visible for testing
219-
LinkedHashMap<TopicIdPartition, Long> acquirablePartitions() {
205+
LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> acquirablePartitions() {
220206
// Initialize the topic partitions for which the fetch should be attempted.
221-
LinkedHashMap<TopicIdPartition, Long> topicPartitionData = new LinkedHashMap<>();
207+
LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData = new LinkedHashMap<>();
222208

223209
sharePartitions.forEach((topicIdPartition, sharePartition) -> {
210+
int partitionMaxBytes = shareFetch.partitionMaxBytes().getOrDefault(topicIdPartition, 0);
224211
// Add the share partition to the list of partitions to be fetched only if we can
225212
// acquire the fetch lock on it.
226213
if (sharePartition.maybeAcquireFetchLock()) {
227214
try {
228215
// If the share partition is already at capacity, we should not attempt to fetch.
229216
if (sharePartition.canAcquireRecords()) {
230-
topicPartitionData.put(topicIdPartition, sharePartition.nextFetchOffset());
217+
topicPartitionData.put(
218+
topicIdPartition,
219+
new FetchRequest.PartitionData(
220+
topicIdPartition.topicId(),
221+
sharePartition.nextFetchOffset(),
222+
0,
223+
partitionMaxBytes,
224+
Optional.empty()
225+
)
226+
);
231227
} else {
232228
sharePartition.releaseFetchLock();
233229
log.trace("Record lock partition limit exceeded for SharePartition {}, " +
@@ -243,28 +239,24 @@ LinkedHashMap<TopicIdPartition, Long> acquirablePartitions() {
243239
return topicPartitionData;
244240
}
245241

246-
private LinkedHashMap<TopicIdPartition, LogReadResult> maybeReadFromLog(LinkedHashMap<TopicIdPartition, Long> topicPartitionData) {
247-
LinkedHashMap<TopicIdPartition, Long> partitionsNotMatchingFetchOffsetMetadata = new LinkedHashMap<>();
248-
topicPartitionData.forEach((topicIdPartition, fetchOffset) -> {
242+
private LinkedHashMap<TopicIdPartition, LogReadResult> maybeReadFromLog(LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData) {
243+
LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> partitionsNotMatchingFetchOffsetMetadata = new LinkedHashMap<>();
244+
topicPartitionData.forEach((topicIdPartition, partitionData) -> {
249245
SharePartition sharePartition = sharePartitions.get(topicIdPartition);
250-
if (sharePartition.fetchOffsetMetadata(fetchOffset).isEmpty()) {
251-
partitionsNotMatchingFetchOffsetMetadata.put(topicIdPartition, fetchOffset);
246+
if (sharePartition.fetchOffsetMetadata(partitionData.fetchOffset).isEmpty()) {
247+
partitionsNotMatchingFetchOffsetMetadata.put(topicIdPartition, partitionData);
252248
}
253249
});
254250
if (partitionsNotMatchingFetchOffsetMetadata.isEmpty()) {
255251
return new LinkedHashMap<>();
256252
}
257253
// We fetch data from replica manager corresponding to the topic partitions that have missing fetch offset metadata.
258-
// Although we are fetching partition max bytes for partitionsNotMatchingFetchOffsetMetadata,
259-
// we will take acquired partitions size = topicPartitionData.size() because we do not want to let the
260-
// leftover partitions to starve which will be fetched later.
261-
return readFromLog(
262-
partitionsNotMatchingFetchOffsetMetadata,
263-
partitionMaxBytesStrategy.maxBytes(shareFetch.fetchParams().maxBytes, partitionsNotMatchingFetchOffsetMetadata.keySet(), topicPartitionData.size()));
254+
return readFromLog(partitionsNotMatchingFetchOffsetMetadata);
264255
}
265256

266-
private void maybeUpdateFetchOffsetMetadata(LinkedHashMap<TopicIdPartition, Long> topicPartitionData,
267-
LinkedHashMap<TopicIdPartition, LogReadResult> replicaManagerReadResponseData) {
257+
private void maybeUpdateFetchOffsetMetadata(
258+
LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData,
259+
LinkedHashMap<TopicIdPartition, LogReadResult> replicaManagerReadResponseData) {
268260
for (Map.Entry<TopicIdPartition, LogReadResult> entry : replicaManagerReadResponseData.entrySet()) {
269261
TopicIdPartition topicIdPartition = entry.getKey();
270262
SharePartition sharePartition = sharePartitions.get(topicIdPartition);
@@ -275,18 +267,17 @@ private void maybeUpdateFetchOffsetMetadata(LinkedHashMap<TopicIdPartition, Long
275267
continue;
276268
}
277269
sharePartition.updateFetchOffsetMetadata(
278-
topicPartitionData.get(topicIdPartition),
270+
topicPartitionData.get(topicIdPartition).fetchOffset,
279271
replicaManagerLogReadResult.info().fetchOffsetMetadata);
280272
}
281273
}
282274

283275
// minByes estimation currently assumes the common case where all fetched data is acquirable.
284-
private boolean isMinBytesSatisfied(LinkedHashMap<TopicIdPartition, Long> topicPartitionData,
285-
LinkedHashMap<TopicIdPartition, Integer> partitionMaxBytes) {
276+
private boolean isMinBytesSatisfied(LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData) {
286277
long accumulatedSize = 0;
287-
for (Map.Entry<TopicIdPartition, Long> entry : topicPartitionData.entrySet()) {
278+
for (Map.Entry<TopicIdPartition, FetchRequest.PartitionData> entry : topicPartitionData.entrySet()) {
288279
TopicIdPartition topicIdPartition = entry.getKey();
289-
long fetchOffset = entry.getValue();
280+
FetchRequest.PartitionData partitionData = entry.getValue();
290281

291282
LogOffsetMetadata endOffsetMetadata;
292283
try {
@@ -303,7 +294,7 @@ private boolean isMinBytesSatisfied(LinkedHashMap<TopicIdPartition, Long> topicP
303294

304295
SharePartition sharePartition = sharePartitions.get(topicIdPartition);
305296

306-
Optional<LogOffsetMetadata> optionalFetchOffsetMetadata = sharePartition.fetchOffsetMetadata(fetchOffset);
297+
Optional<LogOffsetMetadata> optionalFetchOffsetMetadata = sharePartition.fetchOffsetMetadata(partitionData.fetchOffset);
307298
if (optionalFetchOffsetMetadata.isEmpty() || optionalFetchOffsetMetadata.get() == LogOffsetMetadata.UNKNOWN_OFFSET_METADATA)
308299
continue;
309300
LogOffsetMetadata fetchOffsetMetadata = optionalFetchOffsetMetadata.get();
@@ -321,7 +312,7 @@ private boolean isMinBytesSatisfied(LinkedHashMap<TopicIdPartition, Long> topicP
321312
return true;
322313
} else if (fetchOffsetMetadata.onSameSegment(endOffsetMetadata)) {
323314
// we take the partition fetch size as upper bound when accumulating the bytes.
324-
long bytesAvailable = Math.min(endOffsetMetadata.positionDiff(fetchOffsetMetadata), partitionMaxBytes.get(topicIdPartition));
315+
long bytesAvailable = Math.min(endOffsetMetadata.positionDiff(fetchOffsetMetadata), partitionData.maxBytes);
325316
accumulatedSize += bytesAvailable;
326317
}
327318
}
@@ -344,25 +335,13 @@ else if (isolationType == FetchIsolation.HIGH_WATERMARK)
344335

345336
}
346337

347-
private LinkedHashMap<TopicIdPartition, LogReadResult> readFromLog(LinkedHashMap<TopicIdPartition, Long> topicPartitionFetchOffsets,
348-
LinkedHashMap<TopicIdPartition, Integer> partitionMaxBytes) {
338+
private LinkedHashMap<TopicIdPartition, LogReadResult> readFromLog(LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData) {
349339
// Filter if there already exists any erroneous topic partition.
350-
Set<TopicIdPartition> partitionsToFetch = shareFetch.filterErroneousTopicPartitions(topicPartitionFetchOffsets.keySet());
340+
Set<TopicIdPartition> partitionsToFetch = shareFetch.filterErroneousTopicPartitions(topicPartitionData.keySet());
351341
if (partitionsToFetch.isEmpty()) {
352342
return new LinkedHashMap<>();
353343
}
354344

355-
LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData = new LinkedHashMap<>();
356-
357-
topicPartitionFetchOffsets.forEach((topicIdPartition, fetchOffset) -> topicPartitionData.put(topicIdPartition,
358-
new FetchRequest.PartitionData(
359-
topicIdPartition.topicId(),
360-
fetchOffset,
361-
0,
362-
partitionMaxBytes.get(topicIdPartition),
363-
Optional.empty())
364-
));
365-
366345
Seq<Tuple2<TopicIdPartition, LogReadResult>> responseLogResult = replicaManager.readFromLog(
367346
shareFetch.fetchParams(),
368347
CollectionConverters.asScala(
@@ -411,21 +390,18 @@ private void handleFetchException(
411390
}
412391

413392
// Visible for testing.
414-
LinkedHashMap<TopicIdPartition, LogReadResult> combineLogReadResponse(LinkedHashMap<TopicIdPartition, Long> topicPartitionData,
415-
LinkedHashMap<TopicIdPartition, LogReadResult> existingFetchedData) {
416-
LinkedHashMap<TopicIdPartition, Long> missingLogReadTopicPartitions = new LinkedHashMap<>();
417-
topicPartitionData.forEach((topicIdPartition, fetchOffset) -> {
393+
LinkedHashMap<TopicIdPartition, LogReadResult> combineLogReadResponse(LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> topicPartitionData,
394+
LinkedHashMap<TopicIdPartition, LogReadResult> existingFetchedData) {
395+
LinkedHashMap<TopicIdPartition, FetchRequest.PartitionData> missingLogReadTopicPartitions = new LinkedHashMap<>();
396+
topicPartitionData.forEach((topicIdPartition, partitionData) -> {
418397
if (!existingFetchedData.containsKey(topicIdPartition)) {
419-
missingLogReadTopicPartitions.put(topicIdPartition, fetchOffset);
398+
missingLogReadTopicPartitions.put(topicIdPartition, partitionData);
420399
}
421400
});
422401
if (missingLogReadTopicPartitions.isEmpty()) {
423402
return existingFetchedData;
424403
}
425-
426-
LinkedHashMap<TopicIdPartition, LogReadResult> missingTopicPartitionsLogReadResponse = readFromLog(
427-
missingLogReadTopicPartitions,
428-
partitionMaxBytesStrategy.maxBytes(shareFetch.fetchParams().maxBytes, missingLogReadTopicPartitions.keySet(), topicPartitionData.size()));
404+
LinkedHashMap<TopicIdPartition, LogReadResult> missingTopicPartitionsLogReadResponse = readFromLog(missingLogReadTopicPartitions);
429405
missingTopicPartitionsLogReadResponse.putAll(existingFetchedData);
430406
return missingTopicPartitionsLogReadResponse;
431407
}

0 commit comments

Comments
 (0)