From f9ad0092a5c8f036ea1a67d70a8ea38a1e2d8bd0 Mon Sep 17 00:00:00 2001 From: Michel Sabchuk Date: Tue, 3 Mar 2020 16:44:09 -0300 Subject: [PATCH 1/5] Update requirements needed to run tests. --- requirements/requirements-test.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/requirements-test.txt b/requirements/requirements-test.txt index 2addc1f..67e8d6a 100644 --- a/requirements/requirements-test.txt +++ b/requirements/requirements-test.txt @@ -4,4 +4,4 @@ pytest==3.4.0 coverage<4.4 pytest-cov==2.4.0 codeclimate-test-reporter==0.2.3 -attrs>=17.4.0 +attrs>=17.4.0,<19.0.0 From db6b06d8279f528d8b60c716dddedd8e5b088259 Mon Sep 17 00:00:00 2001 From: Michel Sabchuk Date: Tue, 3 Mar 2020 17:02:49 -0300 Subject: [PATCH 2/5] Use mock namespace to avoid pollution. --- tests/test_middlewares.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index fe365e4..73485ff 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -1,6 +1,6 @@ """This module contains the test cases for the middlewares of the ``scrapy_selenium`` package""" -from unittest.mock import patch +from unittest import mock from scrapy import Request from scrapy.crawler import Crawler @@ -64,7 +64,7 @@ def test_spider_closed_should_close_the_driver(self): selenium_middleware = SeleniumMiddleware.from_crawler(crawler) - with patch.object(selenium_middleware.driver, 'quit') as mocked_quit: + with mock.patch.object(selenium_middleware.driver, 'quit') as mocked_quit: selenium_middleware.spider_closed() mocked_quit.assert_called_once() From 475a946cfff0f298bf58099ea0742ab3f557aff6 Mon Sep 17 00:00:00 2001 From: Michel Sabchuk Date: Tue, 3 Mar 2020 17:03:48 -0300 Subject: [PATCH 3/5] Add a test that the WebDriverWait is being executed. This test was missing yet. --- tests/test_middlewares.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 73485ff..51a3ba9 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -135,3 +135,23 @@ def test_process_request_should_execute_script_if_script_option(self): html_response.selector.xpath('//title/text()').extract_first(), 'scrapy_selenium' ) + + @mock.patch('scrapy_selenium.middlewares.WebDriverWait') + def test_process_request_should_use_wait_time_and_wait_until_when_available(self, WebDriverWait): + """Test that the ``process_request`` should execute the WebDriverWait from selenium""" + + wait_time = 2 + wait_until = mock.Mock() # just a unique value to be checked in mock calling + selenium_request = SeleniumRequest( + url='http://www.python.org', + wait_time=wait_time, + wait_until=wait_until, + ) + + self.selenium_middleware.process_request( + request=selenium_request, + spider=None + ) + + WebDriverWait.assert_called_with(self.selenium_middleware.driver, wait_time) + WebDriverWait.return_value.until.assert_called_with(wait_until) From abe26442b31a6631c8b99b02797efe56d5828813 Mon Sep 17 00:00:00 2001 From: Michel Sabchuk Date: Tue, 3 Mar 2020 17:04:21 -0300 Subject: [PATCH 4/5] Add a test that the failed WebDriverWait won't throw an exception. --- tests/test_middlewares.py | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 51a3ba9..c3cbb4c 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -7,6 +7,7 @@ from scrapy_selenium.http import SeleniumRequest from scrapy_selenium.middlewares import SeleniumMiddleware +from selenium.common.exceptions import TimeoutException from .test_cases import BaseScrapySeleniumTestCase @@ -155,3 +156,31 @@ def test_process_request_should_use_wait_time_and_wait_until_when_available(self WebDriverWait.assert_called_with(self.selenium_middleware.driver, wait_time) WebDriverWait.return_value.until.assert_called_with(wait_until) + + @mock.patch('scrapy_selenium.middlewares.WebDriverWait') + def test_process_request_should_still_return_content_if_wait_time_timeouts(self, WebDriverWait): + """Test that the ``process_request`` should execute the WebDriverWait from selenium""" + WebDriverWait.return_value.wait_until.side_effect = TimeoutException + + wait_time = 2 + wait_until = mock.Mock() # just a unique value to be checked in mock calling + selenium_request = SeleniumRequest( + url='http://www.python.org', + wait_time=wait_time, + wait_until=wait_until, + ) + + html_response = self.selenium_middleware.process_request( + request=selenium_request, + spider=None + ) + + # The WebDriverWait was triggered and raised the exception... + WebDriverWait.assert_called_with(self.selenium_middleware.driver, wait_time) + WebDriverWait.return_value.until.assert_called_with(wait_until) + + # But we fall into content anyway after the timeout + self.assertEqual( + html_response.selector.xpath('//title/text()').extract_first(), + 'Welcome to Python.org' + ) From 5e8745f53fddc15cd628086641e4ec4afb59e309 Mon Sep 17 00:00:00 2001 From: Michel Sabchuk Date: Tue, 3 Mar 2020 17:04:51 -0300 Subject: [PATCH 5/5] Avoid propagate the selenium TimeoutException. --- scrapy_selenium/middlewares.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/scrapy_selenium/middlewares.py b/scrapy_selenium/middlewares.py index 201db2c..19c8d72 100644 --- a/scrapy_selenium/middlewares.py +++ b/scrapy_selenium/middlewares.py @@ -6,6 +6,7 @@ from scrapy.exceptions import NotConfigured from scrapy.http import HtmlResponse from selenium.webdriver.support.ui import WebDriverWait +from selenium.common.exceptions import TimeoutException from .http import SeleniumRequest @@ -111,9 +112,12 @@ def process_request(self, request, spider): ) if request.wait_until: - WebDriverWait(self.driver, request.wait_time).until( - request.wait_until - ) + try: + WebDriverWait(self.driver, request.wait_time).until( + request.wait_until + ) + except TimeoutException: + pass if request.screenshot: request.meta['screenshot'] = self.driver.get_screenshot_as_png()