@@ -31,6 +31,26 @@ def get_client():
31
31
return patch .object (df , 'get_http_client' , get_client )
32
32
33
33
34
+ def mock_http_client (test_func ):
35
+ """
36
+ Replace the diffing server's HTTP client with an instance of
37
+ ``MockAsyncHttpClient``. On the resulting mock, call `respond_to()` to set
38
+ responses for various URLs:
39
+
40
+ >>> @mock_http_client
41
+ >>> def test_something(client):
42
+ >>> client.respond_to(r'/a', body='hello')
43
+ >>> client.respond_to(r'/b', code=500, body='goodbye')
44
+ >>> self.fetch('/html_source_dmp?a=http://ex.com/a&b=http://ex.com/b')
45
+ """
46
+ def new_test_func (* args ):
47
+ mock = MockAsyncHttpClient ()
48
+ with patch .object (df , 'get_http_client' , return_value = mock ):
49
+ return test_func (* args , mock )
50
+
51
+ return new_test_func
52
+
53
+
34
54
class DiffingServerTestCase (AsyncHTTPTestCase ):
35
55
36
56
def get_app (self ):
@@ -54,8 +74,10 @@ def test_version(self):
54
74
55
75
class DiffingServerLocalHandlingTest (DiffingServerTestCase ):
56
76
57
- def test_one_local (self ):
77
+ @mock_http_client
78
+ def test_one_local (self , client ):
58
79
with tempfile .NamedTemporaryFile () as a :
80
+ client .respond_to (r'example.org' , body = 'Whatever' )
59
81
response = self .fetch ('/identical_bytes?'
60
82
f'a=file://{ a .name } &b=https://example.org' )
61
83
self .assertEqual (response .code , 200 )
@@ -100,89 +122,99 @@ def test_healthcheck(self):
100
122
101
123
class DiffingServerFetchTest (DiffingServerTestCase ):
102
124
103
- def test_pass_headers (self ):
104
- mock = MockAsyncHttpClient ()
105
- with patch .object (df , 'get_http_client' , return_value = mock ):
106
- mock .respond_to (r'/a$' )
107
- mock .respond_to (r'/b$' )
125
+ @mock_http_client
126
+ def test_pass_headers (self , client ):
127
+ client .respond_to (r'/a$' )
128
+ client .respond_to (r'/b$' )
108
129
109
- self .fetch ('/html_source_dmp?'
110
- 'pass_headers=Authorization,%20User-Agent&'
111
- 'a=https://example.org/a&b=https://example.org/b' ,
112
- headers = {'User-Agent' : 'Some Agent' ,
113
- 'Authorization' : 'Bearer xyz' ,
114
- 'Accept' : 'application/json' })
130
+ self .fetch ('/html_source_dmp?'
131
+ 'pass_headers=Authorization,%20User-Agent&'
132
+ 'a=https://example.org/a&b=https://example.org/b' ,
133
+ headers = {'User-Agent' : 'Some Agent' ,
134
+ 'Authorization' : 'Bearer xyz' ,
135
+ 'Accept' : 'application/json' })
115
136
116
- a_headers = mock .requests ['https://example.org/a' ].headers
117
- assert a_headers .get ('User-Agent' ) == 'Some Agent'
118
- assert a_headers .get ('Authorization' ) == 'Bearer xyz'
119
- assert a_headers .get ('Accept' ) != 'application/json'
137
+ a_headers = client .requests ['https://example.org/a' ].headers
138
+ assert a_headers .get ('User-Agent' ) == 'Some Agent'
139
+ assert a_headers .get ('Authorization' ) == 'Bearer xyz'
140
+ assert a_headers .get ('Accept' ) != 'application/json'
120
141
121
- b_headers = mock .requests ['https://example.org/b' ].headers
122
- assert b_headers .get ('User-Agent' ) == 'Some Agent'
123
- assert b_headers .get ('Authorization' ) == 'Bearer xyz'
124
- assert b_headers .get ('Accept' ) != 'application/json'
142
+ b_headers = client .requests ['https://example.org/b' ].headers
143
+ assert b_headers .get ('User-Agent' ) == 'Some Agent'
144
+ assert b_headers .get ('Authorization' ) == 'Bearer xyz'
145
+ assert b_headers .get ('Accept' ) != 'application/json'
125
146
126
147
127
148
class DiffingServerExceptionHandlingTest (DiffingServerTestCase ):
128
149
129
- def test_local_file_disallowed_in_production (self ):
130
- original = os .environ .get ('WEB_MONITORING_APP_ENV' )
131
- os .environ ['WEB_MONITORING_APP_ENV' ] = 'production'
132
- try :
133
- with tempfile .NamedTemporaryFile () as a :
134
- response = self .fetch ('/identical_bytes?'
135
- f'a=file://{ a .name } &b=https://example.org' )
136
- self .assertEqual (response .code , 403 )
137
- finally :
138
- if original is None :
139
- del os .environ ['WEB_MONITORING_APP_ENV' ]
140
- else :
141
- os .environ ['WEB_MONITORING_APP_ENV' ] = original
150
+ @patch .dict (os .environ , {'WEB_MONITORING_APP_ENV' : 'production' })
151
+ @mock_http_client
152
+ def test_local_file_disallowed_in_production (self , client ):
153
+ client .respond_to (r'example\.org' , body = 'Whatever' )
154
+ with tempfile .NamedTemporaryFile () as a :
155
+ response = self .fetch ('/identical_bytes?'
156
+ f'a=file://{ a .name } &b=https://example.org' )
157
+ self .assertEqual (response .code , 403 )
142
158
143
- def test_invalid_url_a_format (self ):
159
+ @mock_http_client
160
+ def test_invalid_url_a_format (self , client ):
161
+ client .respond_to (r'example\.org' , body = 'Whatever' )
144
162
response = self .fetch ('/html_token?format=json&include=all&'
145
163
'a=example.org&b=https://example.org' )
146
164
self .json_check (response )
147
165
self .assertEqual (response .code , 400 )
148
166
self .assertFalse (response .headers .get ('Etag' ))
149
167
150
- def test_invalid_url_b_format (self ):
168
+ @mock_http_client
169
+ def test_invalid_url_b_format (self , client ):
170
+ client .respond_to (r'example\.org' , body = 'Whatever' )
151
171
response = self .fetch ('/html_token?format=json&include=all&'
152
172
'a=https://example.org&b=example.org' )
153
173
self .json_check (response )
154
174
self .assertEqual (response .code , 400 )
155
175
self .assertFalse (response .headers .get ('Etag' ))
156
176
157
- def test_invalid_diffing_method (self ):
177
+ @mock_http_client
178
+ def test_invalid_diffing_method (self , client ):
179
+ client .respond_to (r'example\.org' , body = 'Whatever' )
158
180
response = self .fetch ('/non_existing?format=json&include=all&'
159
181
'a=example.org&b=https://example.org' )
160
182
self .json_check (response )
161
183
self .assertEqual (response .code , 404 )
162
184
self .assertFalse (response .headers .get ('Etag' ))
163
185
164
- def test_missing_url_a (self ):
186
+ @mock_http_client
187
+ def test_missing_url_a (self , client ):
188
+ client .respond_to (r'example\.org' , body = 'Whatever' )
165
189
response = self .fetch ('/html_token?format=json&include=all&'
166
190
'b=https://example.org' )
167
191
self .json_check (response )
168
192
self .assertEqual (response .code , 400 )
169
193
self .assertFalse (response .headers .get ('Etag' ))
170
194
171
- def test_missing_url_b (self ):
195
+ @mock_http_client
196
+ def test_missing_url_b (self , client ):
197
+ client .respond_to (r'example\.org' , body = 'Whatever' )
172
198
response = self .fetch ('/html_token?format=json&include=all&'
173
199
'a=https://example.org' )
174
200
self .json_check (response )
175
201
self .assertEqual (response .code , 400 )
176
202
self .assertFalse (response .headers .get ('Etag' ))
177
203
178
- def test_not_reachable_url_a (self ):
204
+ @mock_http_client
205
+ def test_not_reachable_url_a (self , client ):
206
+ client .respond_to (r'/eeexample\.org' , error = OSError ('Connection error' ))
207
+ client .respond_to (r'/example\.org' , body = 'Whatever' )
179
208
response = self .fetch ('/html_token?format=json&include=all&'
180
209
'a=https://eeexample.org&b=https://example.org' )
181
210
self .json_check (response )
182
211
self .assertEqual (response .code , 502 )
183
212
self .assertFalse (response .headers .get ('Etag' ))
184
213
185
- def test_not_reachable_url_b (self ):
214
+ @mock_http_client
215
+ def test_not_reachable_url_b (self , client ):
216
+ client .respond_to (r'/eeexample\.org' , error = OSError ('Connection error' ))
217
+ client .respond_to (r'/example\.org' , body = 'Whatever' )
186
218
response = self .fetch ('/html_token?format=json&include=all&'
187
219
'a=https://example.org&b=https://eeexample.org' )
188
220
self .json_check (response )
@@ -202,45 +234,50 @@ async def responder(handler):
202
234
assert response .code == 504
203
235
204
236
def test_missing_params_caller_func (self ):
205
- response = self . fetch ('http://example.org/' )
237
+ response = df . MockResponse ('http://example.org/' , 'Whatever ' )
206
238
with self .assertRaises (KeyError ):
207
239
df .caller (mock_diffing_method , response , response )
208
240
209
- def test_a_is_404 (self ):
241
+ @mock_http_client
242
+ def test_a_is_404 (self , client ):
243
+ client .respond_to (r'/404' , code = 404 , body = 'Error 404' )
244
+ client .respond_to (r'/success' )
245
+
210
246
response = self .fetch ('/html_token?format=json&include=all'
211
247
'&a=http://httpstat.us/404'
212
- '&b=https://example.org' )
248
+ '&b=https://example.org/success ' )
213
249
# The error is upstream, but the message should indicate it was a 404.
214
250
self .assertEqual (response .code , 502 )
215
251
assert '404' in json .loads (response .body )['error' ]
216
252
self .assertFalse (response .headers .get ('Etag' ))
217
253
self .json_check (response )
218
254
219
- def test_accepts_errors_from_web_archives (self ):
255
+ @mock_http_client
256
+ def test_accepts_errors_from_web_archives (self , client ):
220
257
"""
221
258
If a page has HTTP status != 2xx but comes from a web archive,
222
259
we proceed with diffing.
223
260
"""
224
- mock = MockAsyncHttpClient ()
225
- with patch .object (df , 'get_http_client' , return_value = mock ):
226
- mock .respond_to (r'/error$' , code = 404 , headers = {'Memento-Datetime' : 'Tue Sep 25 2018 03:38:50' })
227
- mock .respond_to (r'/success$' )
261
+ client .respond_to (r'/error$' , code = 404 , headers = {'Memento-Datetime' : 'Tue Sep 25 2018 03:38:50' })
262
+ client .respond_to (r'/success$' )
228
263
229
- response = self .fetch ('/html_token?format=json&include=all'
230
- '&a=https://archive.org/20180925033850/http://httpstat.us/error'
231
- '&b=https://example.org/success' )
264
+ response = self .fetch ('/html_token?format=json&include=all'
265
+ '&a=https://archive.org/20180925033850/http://httpstat.us/error'
266
+ '&b=https://example.org/success' )
232
267
233
- self .assertEqual (response .code , 200 )
234
- assert 'change_count' in json .loads (response .body )
268
+ self .assertEqual (response .code , 200 )
269
+ assert 'change_count' in json .loads (response .body )
235
270
236
271
@patch ('web_monitoring_diff.server.server.access_control_allow_origin_header' , '*' )
237
- def test_check_cors_headers (self ):
272
+ @mock_http_client
273
+ def test_check_cors_headers (self , client ):
238
274
"""
239
275
Since we have set Access-Control-Allow-Origin: * on app init,
240
276
the response should have a list of HTTP headers required by CORS.
241
277
Access-Control-Allow-Origin value equals request Origin header because
242
278
we use setting `access_control_allow_origin_header='*'`.
243
279
"""
280
+ client .respond_to (r'example\.org' , body = 'Whatever' )
244
281
response = self .fetch ('/html_token?format=json&include=all'
245
282
'&a=https://example.org&b=https://example.org' ,
246
283
headers = {'Accept' : 'application/json' ,
@@ -252,14 +289,16 @@ def test_check_cors_headers(self):
252
289
253
290
@patch ('web_monitoring_diff.server.server.access_control_allow_origin_header' ,
254
291
'http://one.com,http://two.com,http://three.com' )
255
- def test_cors_origin_header (self ):
292
+ @mock_http_client
293
+ def test_cors_origin_header (self , client ):
256
294
"""
257
295
The allowed origins is a list of URLs. If the request has HTTP
258
296
header `Origin` as one of them, the response `Access-Control-Allow-Origin`
259
297
should have the same value. If not, there shouldn't be any such header
260
298
at all.
261
299
This is necessary for CORS requests with credentials to work properly.
262
300
"""
301
+ client .respond_to (r'example\.org' , body = 'Whatever' )
263
302
response = self .fetch ('/html_token?format=json&include=all'
264
303
'&a=https://example.org&b=https://example.org' ,
265
304
headers = {'Accept' : 'application/json' ,
@@ -547,7 +586,7 @@ def __init__(self):
547
586
self .requests = {}
548
587
self .stub_responses = []
549
588
550
- def respond_to (self , matcher , code = 200 , body = '' , headers = {}, ** kwargs ):
589
+ def respond_to (self , matcher , code = 200 , body = '' , headers = {}, error = None , ** kwargs ):
551
590
"""
552
591
Set up a fake HTTP response. If a request is made and no fake response
553
592
set up with `respond_to()` matches it, an error will be raised.
@@ -566,6 +605,8 @@ def respond_to(self, matcher, code=200, body='', headers={}, **kwargs):
566
605
The response body to send back.
567
606
headers : dict, optional
568
607
Any headers to use for the response.
608
+ error : Exception, optional
609
+ If set, raise an exception instead of returning a mock response.
569
610
**kwargs : any, optional
570
611
Additional keyword args to pass to the Tornado Response.
571
612
Reference: http://www.tornadoweb.org/en/stable/httpclient.html#tornado.httpclient.HTTPResponse
@@ -582,11 +623,16 @@ def respond_to(self, matcher, code=200, body='', headers={}, **kwargs):
582
623
'code' : code ,
583
624
'body' : body ,
584
625
'headers' : headers ,
585
- 'extra' : kwargs
626
+ 'extra' : kwargs ,
627
+ 'error' : error
586
628
})
587
629
588
630
def fetch_impl (self , request , callback ):
589
631
stub = self ._find_stub (request )
632
+ if stub ['error' ]:
633
+ callback (HTTPResponse (request , 600 , error = stub ['error' ]))
634
+ return
635
+
590
636
buffer = BytesIO (utf8 (stub ['body' ]))
591
637
headers = HTTPHeaders (stub ['headers' ])
592
638
response = HTTPResponse (request , stub ['code' ], buffer = buffer ,
0 commit comments