From 21f50c795ac6978de9e73b64f31542a9df928ac5 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 30 Jul 2019 19:46:18 +0500 Subject: [PATCH 1/8] Add async def support to downloader middlewares. --- scrapy/core/downloader/middleware.py | 8 ++++---- tests/test_downloadermiddleware.py | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/scrapy/core/downloader/middleware.py b/scrapy/core/downloader/middleware.py index 38608a429f0..9c0014206ad 100644 --- a/scrapy/core/downloader/middleware.py +++ b/scrapy/core/downloader/middleware.py @@ -8,7 +8,7 @@ from scrapy.exceptions import _InvalidOutput from scrapy.http import Request, Response from scrapy.middleware import MiddlewareManager -from scrapy.utils.defer import mustbe_deferred +from scrapy.utils.defer import mustbe_deferred, deferred_from_coro from scrapy.utils.conf import build_component_list @@ -33,7 +33,7 @@ def download(self, download_func, request, spider): @defer.inlineCallbacks def process_request(request): for method in self.methods['process_request']: - response = yield method(request=request, spider=spider) + response = yield deferred_from_coro(method(request=request, spider=spider)) if response is not None and not isinstance(response, (Response, Request)): raise _InvalidOutput('Middleware %s.process_request must return None, Response or Request, got %s' % \ (method.__self__.__class__.__name__, response.__class__.__name__)) @@ -48,7 +48,7 @@ def process_response(response): defer.returnValue(response) for method in self.methods['process_response']: - response = yield method(request=request, response=response, spider=spider) + response = yield deferred_from_coro(method(request=request, response=response, spider=spider)) if not isinstance(response, (Response, Request)): raise _InvalidOutput('Middleware %s.process_response must return Response or Request, got %s' % \ (method.__self__.__class__.__name__, type(response))) @@ -60,7 +60,7 @@ def process_response(response): def process_exception(_failure): exception = _failure.value for method in self.methods['process_exception']: - response = yield method(request=request, exception=exception, spider=spider) + response = yield deferred_from_coro(method(request=request, exception=exception, spider=spider)) if response is not None and not isinstance(response, (Response, Request)): raise _InvalidOutput('Middleware %s.process_exception must return None, Response or Request, got %s' % \ (method.__self__.__class__.__name__, type(response))) diff --git a/tests/test_downloadermiddleware.py b/tests/test_downloadermiddleware.py index 1b81ea949f1..135321d0ae4 100644 --- a/tests/test_downloadermiddleware.py +++ b/tests/test_downloadermiddleware.py @@ -1,3 +1,4 @@ +import asyncio from unittest import mock from twisted.internet.defer import Deferred @@ -206,3 +207,26 @@ def process_request(self, request, spider): self.assertIs(results[0], resp) self.assertFalse(download_func.called) + + +class MiddlewareUsingCoro(ManagerTestCase): + """Middlewares using asyncio coroutines should work""" + + def test_asyncdef(self): + resp = Response('http://example.com/index.html') + + class CoroMiddleware: + async def process_request(self, request, spider): + await asyncio.sleep(0.1) + return resp + + self.mwman._add_middleware(CoroMiddleware()) + req = Request('http://example.com/index.html') + download_func = mock.MagicMock() + dfd = self.mwman.download(download_func, req, self.spider) + results = [] + dfd.addBoth(results.append) + self._wait(dfd) + + self.assertIs(results[0], resp) + self.assertFalse(download_func.called) From 3603644552f8d15d203abca221edb56047119528 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Thu, 12 Sep 2019 20:25:29 +0500 Subject: [PATCH 2/8] Add a non-asyncio async def middleware test. --- tests/test_downloadermiddleware.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_downloadermiddleware.py b/tests/test_downloadermiddleware.py index 135321d0ae4..5b0cf1eb71b 100644 --- a/tests/test_downloadermiddleware.py +++ b/tests/test_downloadermiddleware.py @@ -1,6 +1,7 @@ import asyncio from unittest import mock +from twisted.internet import defer from twisted.internet.defer import Deferred from twisted.trial.unittest import TestCase from twisted.python.failure import Failure @@ -215,6 +216,25 @@ class MiddlewareUsingCoro(ManagerTestCase): def test_asyncdef(self): resp = Response('http://example.com/index.html') + class CoroMiddleware: + async def process_request(self, request, spider): + await defer.succeed(42) + return resp + + self.mwman._add_middleware(CoroMiddleware()) + req = Request('http://example.com/index.html') + download_func = mock.MagicMock() + dfd = self.mwman.download(download_func, req, self.spider) + results = [] + dfd.addBoth(results.append) + self._wait(dfd) + + self.assertIs(results[0], resp) + self.assertFalse(download_func.called) + + def test_asyncdef_asyncio(self): + resp = Response('http://example.com/index.html') + class CoroMiddleware: async def process_request(self, request, spider): await asyncio.sleep(0.1) From 5cf1ac0005fa3a174b700d88c0e2536b689f13c7 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 16 Dec 2019 19:24:44 +0500 Subject: [PATCH 3/8] Move the asyncio downloader mw test to a separate class. --- tests/test_downloadermiddleware.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_downloadermiddleware.py b/tests/test_downloadermiddleware.py index 5b0cf1eb71b..c5c4d13bd29 100644 --- a/tests/test_downloadermiddleware.py +++ b/tests/test_downloadermiddleware.py @@ -1,6 +1,7 @@ import asyncio from unittest import mock +from pytest import mark from twisted.internet import defer from twisted.internet.defer import Deferred from twisted.trial.unittest import TestCase @@ -232,6 +233,12 @@ async def process_request(self, request, spider): self.assertIs(results[0], resp) self.assertFalse(download_func.called) + +@mark.only_asyncio() +class MiddlewareUsingCoroAsyncio(ManagerTestCase): + + settings_dict = {'ASYNCIO_ENABLED': True} + def test_asyncdef_asyncio(self): resp = Response('http://example.com/index.html') From 50aa6ef22cc9dec3655c9e3aaee225f159e94df1 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Sat, 21 Dec 2019 14:36:11 +0500 Subject: [PATCH 4/8] Add deferred_from_coro. --- scrapy/utils/defer.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/scrapy/utils/defer.py b/scrapy/utils/defer.py index 20ce5929772..bbd5ebe529f 100644 --- a/scrapy/utils/defer.py +++ b/scrapy/utils/defer.py @@ -1,10 +1,14 @@ """ Helper functions for dealing with Twisted deferreds """ +import asyncio +import inspect + from twisted.internet import defer, task from twisted.python import failure from scrapy.exceptions import IgnoreRequest +from scrapy.utils.asyncio import is_asyncio_reactor_installed def defer_fail(_failure): @@ -114,3 +118,25 @@ def iter_errback(iterable, errback, *a, **kw): break except Exception: errback(failure.Failure(), *a, **kw) + + +def _isfuture(o): + # workaround for Python before 3.5.3 not having asyncio.isfuture + if hasattr(asyncio, 'isfuture'): + return asyncio.isfuture(o) + return isinstance(o, asyncio.Future) + + +def deferred_from_coro(o): + """Converts a coroutine into a Deferred, or returns the object as is if it isn't a coroutine""" + if isinstance(o, defer.Deferred): + return o + if _isfuture(o) or inspect.isawaitable(o): + if not is_asyncio_reactor_installed(): + # wrapping the coroutine directly into a Deferred, this doesn't work correctly with coroutines + # that use asyncio, e.g. "await asyncio.sleep(1)" + return defer.ensureDeferred(o) + else: + # wrapping the coroutine into a Future and then into a Deferred, this requires AsyncioSelectorReactor + return defer.Deferred.fromFuture(asyncio.ensure_future(o)) + return o From 16787f5bf4475fe1604c1e4cac7b491f1df6a1fb Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Mon, 30 Dec 2019 12:02:19 +0500 Subject: [PATCH 5/8] Merge middleware tests back as we don't need to set the setting anymore. --- tests/test_downloadermiddleware.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/test_downloadermiddleware.py b/tests/test_downloadermiddleware.py index c5c4d13bd29..3943cecf75b 100644 --- a/tests/test_downloadermiddleware.py +++ b/tests/test_downloadermiddleware.py @@ -233,12 +233,7 @@ async def process_request(self, request, spider): self.assertIs(results[0], resp) self.assertFalse(download_func.called) - -@mark.only_asyncio() -class MiddlewareUsingCoroAsyncio(ManagerTestCase): - - settings_dict = {'ASYNCIO_ENABLED': True} - + @mark.only_asyncio() def test_asyncdef_asyncio(self): resp = Response('http://example.com/index.html') From e3b8ba6188ce703e43563d8588b7d709b037a0e0 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 31 Dec 2019 17:54:01 +0500 Subject: [PATCH 6/8] Run py35-asyncio also on 3.5.2 to test Xenial. --- .travis.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.travis.yml b/.travis.yml index 82167e10a74..c808b3436b4 100644 --- a/.travis.yml +++ b/.travis.yml @@ -18,6 +18,8 @@ matrix: python: 3.5 - env: TOXENV=py35-asyncio python: 3.5 + - env: TOXENV=py35-asyncio + python: 3.5.2 - env: TOXENV=py36 python: 3.6 - env: TOXENV=py37 From 2b9254c2bde76995e81c54ad112ae884a1499386 Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Tue, 31 Dec 2019 17:54:41 +0500 Subject: [PATCH 7/8] Add a test function that uses asyncio.Queue(). --- scrapy/utils/test.py | 9 ++++++++- tests/test_downloadermiddleware.py | 5 +++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/scrapy/utils/test.py b/scrapy/utils/test.py index 307c253520b..0f4cf80914e 100644 --- a/scrapy/utils/test.py +++ b/scrapy/utils/test.py @@ -1,7 +1,7 @@ """ This module contains some assorted functions used in tests """ - +import asyncio import os from importlib import import_module @@ -96,3 +96,10 @@ def assert_samelines(testcase, text1, text2, msg=None): line endings between platforms """ testcase.assertEqual(text1.splitlines(), text2.splitlines(), msg) + + +def get_from_asyncio_queue(value): + q = asyncio.Queue() + getter = q.get() + q.put_nowait(value) + return getter diff --git a/tests/test_downloadermiddleware.py b/tests/test_downloadermiddleware.py index 3943cecf75b..3dd4f2351a2 100644 --- a/tests/test_downloadermiddleware.py +++ b/tests/test_downloadermiddleware.py @@ -11,7 +11,7 @@ from scrapy.spiders import Spider from scrapy.exceptions import _InvalidOutput from scrapy.core.downloader.middleware import DownloaderMiddlewareManager -from scrapy.utils.test import get_crawler +from scrapy.utils.test import get_crawler, get_from_asyncio_queue from scrapy.utils.python import to_bytes @@ -240,7 +240,8 @@ def test_asyncdef_asyncio(self): class CoroMiddleware: async def process_request(self, request, spider): await asyncio.sleep(0.1) - return resp + result = await get_from_asyncio_queue(resp) + return result self.mwman._add_middleware(CoroMiddleware()) req = Request('http://example.com/index.html') From b2dd379bc2b93b4863dd36a296780481d758c4cb Mon Sep 17 00:00:00 2001 From: Andrey Rakhmatullin Date: Fri, 3 Jan 2020 21:38:05 +0500 Subject: [PATCH 8/8] Remove the py35-asyncio env for 3.5 from Travis. --- .travis.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.travis.yml b/.travis.yml index c808b3436b4..66e1a9617d8 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,8 +16,6 @@ matrix: python: 3.5 - env: TOXENV=pinned python: 3.5 - - env: TOXENV=py35-asyncio - python: 3.5 - env: TOXENV=py35-asyncio python: 3.5.2 - env: TOXENV=py36