Skip to content

Commit 2adc3f0

Browse files
j4n7clemfromspace
authored andcommitted
Add the script parameter (#29)
Add the 'script' parameter to the SeleniumRequest
1 parent 20ff8ae commit 2adc3f0

File tree

4 files changed

+36
-3
lines changed

4 files changed

+36
-3
lines changed

README.md

+11-2
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def parse_result(self, response):
5252
```
5353

5454
### Additional arguments
55-
The `scrapy_selenium.SeleniumRequest` accept 3 additional arguments:
55+
The `scrapy_selenium.SeleniumRequest` accept 4 additional arguments:
5656

5757
#### `wait_time` / `wait_until`
5858

@@ -80,6 +80,15 @@ yield SeleniumRequest(
8080

8181
def parse_result(self, response):
8282
with open('image.png', 'wb') as image_file:
83-
image_file.write(response.meta['screenshot])
83+
image_file.write(response.meta['screenshot'])
8484
```
8585

86+
#### `script`
87+
When used, selenium will execute custom JavaScript code.
88+
```python
89+
yield SeleniumRequest(
90+
url,
91+
self.parse_result,
92+
script='window.scrollTo(0, document.body.scrollHeight);',
93+
)
94+
```

scrapy_selenium/http.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
class SeleniumRequest(Request):
77
"""Scrapy ``Request`` subclass providing additional arguments"""
88

9-
def __init__(self, wait_time=None, wait_until=None, screenshot=False, *args, **kwargs):
9+
def __init__(self, wait_time=None, wait_until=None, screenshot=False, script=None, *args, **kwargs):
1010
"""Initialize a new selenium request
1111
1212
Parameters
@@ -19,11 +19,14 @@ def __init__(self, wait_time=None, wait_until=None, screenshot=False, *args, **k
1919
screenshot: bool
2020
If True, a screenshot of the page will be taken and the data of the screenshot
2121
will be returned in the response "meta" attribute.
22+
script: str
23+
JavaScript code to execute.
2224
2325
"""
2426

2527
self.wait_time = wait_time
2628
self.wait_until = wait_until
2729
self.screenshot = screenshot
30+
self.script = script
2831

2932
super().__init__(*args, **kwargs)

scrapy_selenium/middlewares.py

+3
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,9 @@ def process_request(self, request, spider):
9999
if request.screenshot:
100100
request.meta['screenshot'] = self.driver.get_screenshot_as_png()
101101

102+
if request.script:
103+
self.driver.execute_script(request.script)
104+
102105
body = str.encode(self.driver.page_source)
103106

104107
# Expose the driver via the "meta" attribute

tests/test_middlewares.py

+18
Original file line numberDiff line numberDiff line change
@@ -117,3 +117,21 @@ def test_process_request_should_return_a_screenshot_if_screenshot_option(self):
117117
)
118118

119119
self.assertIsNotNone(html_response.meta['screenshot'])
120+
121+
def test_process_request_should_execute_script_if_script_option(self):
122+
"""Test that the ``process_request`` should execute the script and return a response"""
123+
124+
selenium_request = SeleniumRequest(
125+
url='http://www.python.org',
126+
script='document.title = "scrapy_selenium";'
127+
)
128+
129+
html_response = self.selenium_middleware.process_request(
130+
request=selenium_request,
131+
spider=None
132+
)
133+
134+
self.assertEqual(
135+
html_response.selector.xpath('//title/text()').extract_first(),
136+
'scrapy_selenium'
137+
)

0 commit comments

Comments
 (0)