diff --git a/src/scrapy_redis/spiders.py b/src/scrapy_redis/spiders.py index caaa23db..902e9582 100644 --- a/src/scrapy_redis/spiders.py +++ b/src/scrapy_redis/spiders.py @@ -96,7 +96,7 @@ def setup_redis(self, crawler=None): # The idle signal is called when the spider has no requests left, # that's when we will schedule new requests from redis queue - crawler.signals.connect(self.spider_idle, signal=signals.spider_idle) + crawler.signals.connect(self.fill_requests_queue, signal=signals.request_left_downloader) def pop_list_queue(self, redis_key, batch_size): with self.server.pipeline() as pipe: @@ -112,11 +112,22 @@ def pop_priority_queue(self, redis_key, batch_size): datas, _ = pipe.execute() return datas + def fill_requests_queue(self): + need_size = self.crawler.engine.downloader.total_concurrency - \ + len(self.crawler.engine.downloader.active) - len(self.crawler.engine.slot.scheduler.queue) + if need_size > 0: + self.logger.debug("Need to fill %i request(s)", need_size) + for req in self.__next_requests(need_size): + self.crawler.engine.crawl(req, spider=self) + def next_requests(self): + return self.__next_requests(self.redis_batch_size) + + def __next_requests(self, redis_batch_size): """Returns a request to be scheduled or none.""" # XXX: Do we need to use a timeout here? found = 0 - datas = self.fetch_data(self.redis_key, self.redis_batch_size) + datas = self.fetch_data(self.redis_key, redis_batch_size) for data in datas: reqs = self.make_request_from_data(data) if isinstance(reqs, Iterable): diff --git a/tests/test_spiders.py b/tests/test_spiders.py index c8b31d64..92791fbe 100644 --- a/tests/test_spiders.py +++ b/tests/test_spiders.py @@ -70,7 +70,7 @@ def test_via_from_crawler(self, connection): myspider = MySpider.from_crawler(crawler) assert myspider.server is server connection.from_settings.assert_called_with(crawler.settings) - crawler.signals.connect.assert_called_with(myspider.spider_idle, signal=signals.spider_idle) + crawler.signals.connect.assert_called_with(myspider.fill_requests_queue, signal=signals.request_left_downloader) # Second call does nothing. server = myspider.server crawler.signals.connect.reset_mock()