@@ -734,7 +734,13 @@ def _poll_once(self, timeout_ms, max_records, update_offsets=True):
734
734
if records :
735
735
return records
736
736
737
- self ._client .poll (timeout_ms = inner_timeout_ms (self ._coordinator .time_to_next_poll () * 1000 ))
737
+ # We do not want to be stuck blocking in poll if we are missing some positions
738
+ # since the offset lookup may be backing off after a failure
739
+ poll_timeout_ms = inner_timeout_ms (self ._coordinator .time_to_next_poll () * 1000 )
740
+ if not has_all_fetch_positions :
741
+ poll_timeout_ms = min (poll_timeout_ms , self .config ['retry_backoff_ms' ])
742
+
743
+ self ._client .poll (timeout_ms = poll_timeout_ms )
738
744
# after the long poll, we should check whether the group needs to rebalance
739
745
# prior to returning data so that the group can stabilize faster
740
746
if self ._coordinator .need_rejoin ():
@@ -1142,29 +1148,27 @@ def _update_fetch_positions(self, timeout_ms=None):
1142
1148
if self ._subscription .has_all_fetch_positions ():
1143
1149
return True
1144
1150
1145
- inner_timeout_ms = timeout_ms_fn (timeout_ms , 'Timeout updating fetch positions' )
1146
- try :
1147
- if (self .config ['api_version' ] >= (0 , 8 , 1 ) and
1148
- self .config ['group_id' ] is not None ):
1151
+ if (self .config ['api_version' ] >= (0 , 8 , 1 ) and
1152
+ self .config ['group_id' ] is not None ):
1153
+ try :
1149
1154
# If there are any partitions which do not have a valid position and are not
1150
1155
# awaiting reset, then we need to fetch committed offsets. We will only do a
1151
1156
# coordinator lookup if there are partitions which have missing positions, so
1152
1157
# a consumer with manually assigned partitions can avoid a coordinator dependence
1153
1158
# by always ensuring that assigned partitions have an initial position.
1154
- self ._coordinator .refresh_committed_offsets_if_needed (timeout_ms = inner_timeout_ms ())
1155
-
1156
- # If there are partitions still needing a position and a reset policy is defined,
1157
- # request reset using the default policy. If no reset strategy is defined and there
1158
- # are partitions with a missing position, then we will raise an exception.
1159
- self ._subscription .reset_missing_positions ()
1160
-
1161
- # Finally send an asynchronous request to lookup and update the positions of any
1162
- # partitions which are awaiting reset.
1163
- self ._fetcher .reset_offsets_if_needed ()
1164
- return True
1165
-
1166
- except KafkaTimeoutError :
1167
- return False
1159
+ self ._coordinator .refresh_committed_offsets_if_needed (timeout_ms = timeout_ms )
1160
+ except KafkaTimeoutError :
1161
+ pass
1162
+
1163
+ # If there are partitions still needing a position and a reset policy is defined,
1164
+ # request reset using the default policy. If no reset strategy is defined and there
1165
+ # are partitions with a missing position, then we will raise an exception.
1166
+ self ._subscription .reset_missing_positions ()
1167
+
1168
+ # Finally send an asynchronous request to lookup and update the positions of any
1169
+ # partitions which are awaiting reset.
1170
+ self ._fetcher .reset_offsets_if_needed ()
1171
+ return False
1168
1172
1169
1173
def _message_generator_v2 (self ):
1170
1174
timeout_ms = 1000 * max (0 , self ._consumer_timeout - time .time ())
0 commit comments