From 560ec91f608c7db9c3f328c8fcaeddab5d6715af Mon Sep 17 00:00:00 2001 From: 111qqz <30498101+111qqz@users.noreply.github.com> Date: Thu, 25 Oct 2018 13:41:07 -0400 Subject: [PATCH 1/3] add python version requirement (#16) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1bdc193..c46857a 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Scrapy middleware to handle javascript pages using selenium. ``` $ pip install scrapy-selenium ``` - +You should use **python>=3.6**. You will also need one of the Selenium [compatible browsers](http://www.seleniumhq.org/about/platforms.jsp). ## Configuration From cac4f0f7e8886244f5b487e85772f60898f90f39 Mon Sep 17 00:00:00 2001 From: Giles Richard Greenway Date: Thu, 3 Jan 2019 20:38:27 +0000 Subject: [PATCH 2/3] Add SELENIUM_BROWSER_EXECUTABLE_PATH setting Add the SELENIUM_BROWSER_EXECUTABLE_PATH settings to provide the path to the browser binary --- README.md | 11 ++++++++--- scrapy_selenium/middlewares.py | 12 +++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index c46857a..0089ef5 100644 --- a/README.md +++ b/README.md @@ -11,15 +11,20 @@ You should use **python>=3.6**. You will also need one of the Selenium [compatible browsers](http://www.seleniumhq.org/about/platforms.jsp). ## Configuration -1. Add the browser to use, the path to the executable, and the arguments to pass to the executable to the scrapy settings: +1. Add the browser to use, the path to the driver executable, and the arguments to pass to the executable to the scrapy settings: ```python from shutil import which - SELENIUM_DRIVER_NAME='firefox' - SELENIUM_DRIVER_EXECUTABLE_PATH=which('geckodriver') + SELENIUM_DRIVER_NAME = 'firefox' + SELENIUM_DRIVER_EXECUTABLE_PATH = which('geckodriver') SELENIUM_DRIVER_ARGUMENTS=['-headless'] # '--headless' if using chrome instead of firefox ``` +Optionally, set the path to the browser executable: + ```python + SELENIUM_BROWSER_EXECUTABLE_PATH = which('firefox') + ``` + 2. Add the `SeleniumMiddleware` to the downloader middlewares: ```python DOWNLOADER_MIDDLEWARES = { diff --git a/scrapy_selenium/middlewares.py b/scrapy_selenium/middlewares.py index 862f6e1..1a4d8a8 100644 --- a/scrapy_selenium/middlewares.py +++ b/scrapy_selenium/middlewares.py @@ -13,7 +13,8 @@ class SeleniumMiddleware: """Scrapy middleware handling the requests using selenium""" - def __init__(self, driver_name, driver_executable_path, driver_arguments): + def __init__(self, driver_name, driver_executable_path, driver_arguments, + browser_executable_path): """Initialize the selenium webdriver Parameters @@ -24,7 +25,8 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments): The path of the executable binary of the driver driver_arguments: list A list of arguments to initialize the driver - + browser_executable_path: str + The path of the executable binary of the browser """ webdriver_base_path = f'selenium.webdriver.{driver_name}' @@ -36,6 +38,8 @@ def __init__(self, driver_name, driver_executable_path, driver_arguments): driver_options_klass = getattr(driver_options_module, 'Options') driver_options = driver_options_klass() + if browser_executable_path: + driver_options.binary_location = browser_executable_path for argument in driver_arguments: driver_options.add_argument(argument) @@ -52,6 +56,7 @@ def from_crawler(cls, crawler): driver_name = crawler.settings.get('SELENIUM_DRIVER_NAME') driver_executable_path = crawler.settings.get('SELENIUM_DRIVER_EXECUTABLE_PATH') + browser_executable_path = crawler.settings.get('SELENIUM_BROWSER_EXECUTABLE_PATH') driver_arguments = crawler.settings.get('SELENIUM_DRIVER_ARGUMENTS') if not driver_name or not driver_executable_path: @@ -62,7 +67,8 @@ def from_crawler(cls, crawler): middleware = cls( driver_name=driver_name, driver_executable_path=driver_executable_path, - driver_arguments=driver_arguments + driver_arguments=driver_arguments, + browser_executable_path=browser_executable_path ) crawler.signals.connect(middleware.spider_closed, signals.spider_closed) From aa34ef3e96b8247a518f99eec259e284ce6ca719 Mon Sep 17 00:00:00 2001 From: Clement Denoix Date: Thu, 3 Jan 2019 21:46:05 +0100 Subject: [PATCH 3/3] Bump the version number to 0.0.6 --- setup.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index 284a9b2..220464a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = scrapy-selenium -version = 0.0.5 +version = 0.0.6 url = https://github.com/clemfromspace/scrapy-selenium licence = DO WHAT THE FUCK YOU WANT TO PUBLIC LICENSE description = Scrapy with selenium