15
15
import logging
16
16
import random
17
17
import re
18
+ import time
18
19
19
20
from kafka .vendor import six
20
21
21
- from kafka .errors import IllegalStateError
22
+ import kafka .errors as Errors
22
23
from kafka .protocol .list_offsets import OffsetResetStrategy
23
24
from kafka .structs import OffsetAndMetadata
24
25
from kafka .util import ensure_valid_topic_name
@@ -52,10 +53,6 @@ class SubscriptionState(object):
52
53
Note that pause state as well as fetch/consumed positions are not preserved
53
54
when partition assignment is changed whether directly by the user or
54
55
through a group rebalance.
55
-
56
- This class also maintains a cache of the latest commit position for each of
57
- the assigned partitions. This is updated through committed() and can be used
58
- to set the initial fetch position (e.g. Fetcher._reset_offset() ).
59
56
"""
60
57
_SUBSCRIPTION_EXCEPTION_MESSAGE = (
61
58
"You must choose only one way to configure your consumer:"
@@ -85,18 +82,16 @@ def __init__(self, offset_reset_strategy='earliest'):
85
82
self ._group_subscription = set ()
86
83
self ._user_assignment = set ()
87
84
self .assignment = OrderedDict ()
88
- self .listener = None
89
-
90
- # initialize to true for the consumers to fetch offset upon starting up
91
- self .needs_fetch_committed_offsets = True
85
+ self .rebalance_listener = None
86
+ self .listeners = []
92
87
93
88
def _set_subscription_type (self , subscription_type ):
94
89
if not isinstance (subscription_type , SubscriptionType ):
95
90
raise ValueError ('SubscriptionType enum required' )
96
91
if self .subscription_type == SubscriptionType .NONE :
97
92
self .subscription_type = subscription_type
98
93
elif self .subscription_type != subscription_type :
99
- raise IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
94
+ raise Errors . IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
100
95
101
96
def subscribe (self , topics = (), pattern = None , listener = None ):
102
97
"""Subscribe to a list of topics, or a topic regex pattern.
@@ -135,7 +130,7 @@ def subscribe(self, topics=(), pattern=None, listener=None):
135
130
"""
136
131
assert topics or pattern , 'Must provide topics or pattern'
137
132
if (topics and pattern ):
138
- raise IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
133
+ raise Errors . IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
139
134
140
135
elif pattern :
141
136
self ._set_subscription_type (SubscriptionType .AUTO_PATTERN )
@@ -150,7 +145,7 @@ def subscribe(self, topics=(), pattern=None, listener=None):
150
145
151
146
if listener and not isinstance (listener , ConsumerRebalanceListener ):
152
147
raise TypeError ('listener must be a ConsumerRebalanceListener' )
153
- self .listener = listener
148
+ self .rebalance_listener = listener
154
149
155
150
def change_subscription (self , topics ):
156
151
"""Change the topic subscription.
@@ -166,7 +161,7 @@ def change_subscription(self, topics):
166
161
- a topic name does not consist of ASCII-characters/'-'/'_'/'.'
167
162
"""
168
163
if not self .partitions_auto_assigned ():
169
- raise IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
164
+ raise Errors . IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
170
165
171
166
if isinstance (topics , six .string_types ):
172
167
topics = [topics ]
@@ -193,13 +188,13 @@ def group_subscribe(self, topics):
193
188
topics (list of str): topics to add to the group subscription
194
189
"""
195
190
if not self .partitions_auto_assigned ():
196
- raise IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
191
+ raise Errors . IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
197
192
self ._group_subscription .update (topics )
198
193
199
194
def reset_group_subscription (self ):
200
195
"""Reset the group's subscription to only contain topics subscribed by this consumer."""
201
196
if not self .partitions_auto_assigned ():
202
- raise IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
197
+ raise Errors . IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
203
198
assert self .subscription is not None , 'Subscription required'
204
199
self ._group_subscription .intersection_update (self .subscription )
205
200
@@ -226,7 +221,6 @@ def assign_from_user(self, partitions):
226
221
self ._user_assignment = set (partitions )
227
222
self ._set_assignment ({partition : self .assignment .get (partition , TopicPartitionState ())
228
223
for partition in partitions })
229
- self .needs_fetch_committed_offsets = True
230
224
231
225
def assign_from_subscribed (self , assignments ):
232
226
"""Update the assignment to the specified partitions
@@ -241,16 +235,14 @@ def assign_from_subscribed(self, assignments):
241
235
consumer instance.
242
236
"""
243
237
if not self .partitions_auto_assigned ():
244
- raise IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
238
+ raise Errors . IllegalStateError (self ._SUBSCRIPTION_EXCEPTION_MESSAGE )
245
239
246
240
for tp in assignments :
247
241
if tp .topic not in self .subscription :
248
242
raise ValueError ("Assigned partition %s for non-subscribed topic." % (tp ,))
249
243
250
- # after rebalancing, we always reinitialize the assignment value
251
244
# randomized ordering should improve balance for short-lived consumers
252
245
self ._set_assignment ({partition : TopicPartitionState () for partition in assignments }, randomize = True )
253
- self .needs_fetch_committed_offsets = True
254
246
log .info ("Updated partition assignment: %s" , assignments )
255
247
256
248
def _set_assignment (self , partition_states , randomize = False ):
@@ -300,8 +292,10 @@ def seek(self, partition, offset):
300
292
301
293
Arguments:
302
294
partition (TopicPartition): partition for seek operation
303
- offset (int): message offset in partition
295
+ offset (int or OffsetAndMetadata ): message offset in partition
304
296
"""
297
+ if not isinstance (offset , (int , OffsetAndMetadata )):
298
+ raise TypeError ("offset must be type in or OffsetAndMetadata" )
305
299
self .assignment [partition ].seek (offset )
306
300
307
301
def assigned_partitions (self ):
@@ -333,7 +327,7 @@ def all_consumed_offsets(self):
333
327
all_consumed [partition ] = state .position
334
328
return all_consumed
335
329
336
- def need_offset_reset (self , partition , offset_reset_strategy = None ):
330
+ def request_offset_reset (self , partition , offset_reset_strategy = None ):
337
331
"""Mark partition for offset reset using specified or default strategy.
338
332
339
333
Arguments:
@@ -342,7 +336,11 @@ def need_offset_reset(self, partition, offset_reset_strategy=None):
342
336
"""
343
337
if offset_reset_strategy is None :
344
338
offset_reset_strategy = self ._default_offset_reset_strategy
345
- self .assignment [partition ].await_reset (offset_reset_strategy )
339
+ self .assignment [partition ].reset (offset_reset_strategy )
340
+
341
+ def set_reset_pending (self , partitions , next_allowed_reset_time ):
342
+ for partition in partitions :
343
+ self .assignment [partition ].set_reset_pending (next_allowed_reset_time )
346
344
347
345
def has_default_offset_reset_policy (self ):
348
346
"""Return True if default offset reset policy is Earliest or Latest"""
@@ -351,24 +349,41 @@ def has_default_offset_reset_policy(self):
351
349
def is_offset_reset_needed (self , partition ):
352
350
return self .assignment [partition ].awaiting_reset
353
351
354
- def has_all_fetch_positions (self , partitions = None ):
355
- if partitions is None :
356
- partitions = self .assigned_partitions ()
357
- for tp in partitions :
358
- if not self .has_valid_position (tp ):
352
+ def has_all_fetch_positions (self ):
353
+ for state in six .itervalues (self .assignment ):
354
+ if not state .has_valid_position :
359
355
return False
360
356
return True
361
357
362
358
def missing_fetch_positions (self ):
363
359
missing = set ()
364
360
for partition , state in six .iteritems (self .assignment ):
365
- if not state .has_valid_position :
361
+ if state .is_missing_position () :
366
362
missing .add (partition )
367
363
return missing
368
364
369
365
def has_valid_position (self , partition ):
370
366
return partition in self .assignment and self .assignment [partition ].has_valid_position
371
367
368
+ def reset_missing_positions (self ):
369
+ partitions_with_no_offsets = set ()
370
+ for tp , state in six .iteritems (self .assignment ):
371
+ if state .is_missing_position ():
372
+ if self ._default_offset_reset_strategy == OffsetResetStrategy .NONE :
373
+ partitions_with_no_offsets .add (tp )
374
+ else :
375
+ state .reset (self ._default_offset_reset_strategy )
376
+
377
+ if partitions_with_no_offsets :
378
+ raise Errors .NoOffsetForPartitionError (partitions_with_no_offsets )
379
+
380
+ def partitions_needing_reset (self ):
381
+ partitions = set ()
382
+ for tp , state in six .iteritems (self .assignment ):
383
+ if state .awaiting_reset and state .is_reset_allowed ():
384
+ partitions .add (tp )
385
+ return partitions
386
+
372
387
def is_assigned (self , partition ):
373
388
return partition in self .assignment
374
389
@@ -384,6 +399,10 @@ def pause(self, partition):
384
399
def resume (self , partition ):
385
400
self .assignment [partition ].resume ()
386
401
402
+ def reset_failed (self , partitions , next_retry_time ):
403
+ for partition in partitions :
404
+ self .assignment [partition ].reset_failed (next_retry_time )
405
+
387
406
def move_partition_to_end (self , partition ):
388
407
if partition in self .assignment :
389
408
try :
@@ -398,14 +417,12 @@ def position(self, partition):
398
417
399
418
class TopicPartitionState (object ):
400
419
def __init__ (self ):
401
- self .committed = None # last committed OffsetAndMetadata
402
- self .has_valid_position = False # whether we have valid position
403
420
self .paused = False # whether this partition has been paused by the user
404
- self .awaiting_reset = False # whether we are awaiting reset
405
421
self .reset_strategy = None # the reset strategy if awaiting_reset is set
406
422
self ._position = None # OffsetAndMetadata exposed to the user
407
423
self .highwater = None
408
424
self .drop_pending_record_batch = False
425
+ self .next_allowed_retry_time = None
409
426
410
427
def _set_position (self , offset ):
411
428
assert self .has_valid_position , 'Valid position required'
@@ -417,18 +434,37 @@ def _get_position(self):
417
434
418
435
position = property (_get_position , _set_position , None , "last position" )
419
436
420
- def await_reset (self , strategy ):
421
- self . awaiting_reset = True
437
+ def reset (self , strategy ):
438
+ assert strategy is not None
422
439
self .reset_strategy = strategy
423
440
self ._position = None
424
- self .has_valid_position = False
441
+ self .next_allowed_retry_time = None
442
+
443
+ def is_reset_allowed (self ):
444
+ return self .next_allowed_retry_time is None or self .next_allowed_retry_time < time .time ()
445
+
446
+ @property
447
+ def awaiting_reset (self ):
448
+ return self .reset_strategy is not None
449
+
450
+ def set_reset_pending (self , next_allowed_retry_time ):
451
+ self .next_allowed_retry_time = next_allowed_retry_time
452
+
453
+ def reset_failed (self , next_allowed_retry_time ):
454
+ self .next_allowed_retry_time = next_allowed_retry_time
455
+
456
+ @property
457
+ def has_valid_position (self ):
458
+ return self ._position is not None
459
+
460
+ def is_missing_position (self ):
461
+ return not self .has_valid_position and not self .awaiting_reset
425
462
426
463
def seek (self , offset ):
427
- self ._position = OffsetAndMetadata (offset , '' , - 1 )
428
- self .awaiting_reset = False
464
+ self ._position = offset if isinstance (offset , OffsetAndMetadata ) else OffsetAndMetadata (offset , '' , - 1 )
429
465
self .reset_strategy = None
430
- self .has_valid_position = True
431
466
self .drop_pending_record_batch = True
467
+ self .next_allowed_retry_time = None
432
468
433
469
def pause (self ):
434
470
self .paused = True
0 commit comments