Skip to content

Commit afeaa9a

Browse files
committed
Do not consume unnecessary memory during sharding
There is no need to create a temporary list of a potentially very large step/world size.
1 parent ca58154 commit afeaa9a

File tree

1 file changed

+1
-6
lines changed

1 file changed

+1
-6
lines changed

src/datasets/iterable_dataset.py

+1-6
Original file line numberDiff line numberDiff line change
@@ -556,12 +556,7 @@ def _init_state_dict(self) -> dict:
556556

557557
def __iter__(self):
558558
ex_iterator = iter(self.ex_iterable)
559-
while True:
560-
batch = list(islice(ex_iterator, self.step))
561-
if len(batch) > self.offset:
562-
yield batch[self.offset]
563-
else:
564-
break
559+
return islice(ex_iterator, self.offset, None, self.step)
565560

566561
def shuffle_data_sources(self, generator: np.random.Generator) -> "StepExamplesIterable":
567562
return StepExamplesIterable(

0 commit comments

Comments
 (0)