Skip to content

Commit 50b8c6f

Browse files
HairlessVillagerrmax
andauthoredJul 6, 2024··
fix: Scheduler not compatible with BaseDupeFilter (#294)
* fix: Scheduler not compatible with BaseDupeFilter Co-authored-by: R Max Espinoza <hey@rmax.dev>
1 parent ea646cb commit 50b8c6f

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed
 

‎src/scrapy_redis/scheduler.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ def __init__(
3737
flush_on_start=False,
3838
queue_key=defaults.SCHEDULER_QUEUE_KEY,
3939
queue_cls=defaults.SCHEDULER_QUEUE_CLASS,
40+
dupefilter=None,
4041
dupefilter_key=defaults.SCHEDULER_DUPEFILTER_KEY,
4142
dupefilter_cls=defaults.SCHEDULER_DUPEFILTER_CLASS,
4243
idle_before_close=0,
@@ -56,6 +57,8 @@ def __init__(
5657
Requests queue key.
5758
queue_cls : str
5859
Importable path to the queue class.
60+
dupefilter: Dupefilter
61+
Custom dupefilter instance.
5962
dupefilter_key : str
6063
Duplicates filter key.
6164
dupefilter_cls : str
@@ -72,6 +75,7 @@ def __init__(
7275
self.flush_on_start = flush_on_start
7376
self.queue_key = queue_key
7477
self.queue_cls = queue_cls
78+
self.df = dupefilter
7579
self.dupefilter_cls = dupefilter_cls
7680
self.dupefilter_key = dupefilter_key
7781
self.idle_before_close = idle_before_close
@@ -105,6 +109,10 @@ def from_settings(cls, settings):
105109
if val:
106110
kwargs[name] = val
107111

112+
dupefilter_cls = load_object(kwargs["dupefilter_cls"])
113+
if not hasattr(dupefilter_cls, "from_spider"):
114+
kwargs["dupefilter"] = dupefilter_cls.from_settings(settings)
115+
108116
# Support serializer as a path to a module.
109117
if isinstance(kwargs.get("serializer"), str):
110118
kwargs["serializer"] = importlib.import_module(kwargs["serializer"])
@@ -137,7 +145,8 @@ def open(self, spider):
137145
f"Failed to instantiate queue class '{self.queue_cls}': {e}"
138146
)
139147

140-
self.df = load_object(self.dupefilter_cls).from_spider(spider)
148+
if not self.df:
149+
self.df = load_object(self.dupefilter_cls).from_spider(spider)
141150

142151
if self.flush_on_start:
143152
self.flush()

0 commit comments

Comments
 (0)