11
11
12
12
13
13
@pytest .mark .stability
14
- @pytest .mark .parametrize ("minimum" , (0 , 1 ) )
14
+ @pytest .mark .parametrize ("minimum,threshold " , [ (0 , 240 ), ( 1 , 120 )] )
15
15
@pytest .mark .parametrize (
16
16
"scatter" ,
17
17
(
18
18
False ,
19
19
pytest .param (True , marks = [pytest .mark .xfail (reason = "dask/distributed#6686" )]),
20
20
),
21
21
)
22
- def test_scale_up_on_task_load (minimum , scatter ):
22
+ def test_scale_up_on_task_load (minimum , threshold , scatter ):
23
+ """Tests that adaptive scaling reacts in a reasonable amount of time to
24
+ an increased task load and scales up.
25
+ """
23
26
maximum = 10
24
27
with Cluster (
25
28
name = f"test_adaptive_scaling-{ uuid .uuid4 ().hex } " ,
@@ -47,7 +50,7 @@ def clog(x: int, ev: Event) -> int:
47
50
client .wait_for_workers (n_workers = maximum , timeout = TIMEOUT_THRESHOLD )
48
51
start = time .monotonic ()
49
52
duration = end - start
50
- assert duration < 360
53
+ assert duration < threshold , duration
51
54
assert len (adapt .log ) <= 2
52
55
assert adapt .log [- 1 ][1 ] == {"status" : "up" , "n" : maximum }
53
56
ev_fan_out .set ()
@@ -58,6 +61,10 @@ def clog(x: int, ev: Event) -> int:
58
61
@pytest .mark .stability
59
62
@pytest .mark .parametrize ("minimum" , (0 , 1 ))
60
63
def test_adapt_to_changing_workload (minimum : int ):
64
+ """Tests that adaptive scaling reacts within a reasonable amount of time to
65
+ a varying task load and scales up or down. This also asserts that no recomputation
66
+ is caused by the scaling.
67
+ """
61
68
maximum = 10
62
69
fan_out_size = 100
63
70
with Cluster (
@@ -70,41 +77,58 @@ def test_adapt_to_changing_workload(minimum: int):
70
77
adapt = cluster .adapt (minimum = minimum , maximum = maximum )
71
78
assert len (adapt .log ) == 0
72
79
80
+ @delayed
73
81
def clog (x : int , ev : Event , sem : Semaphore , ** kwargs ) -> int :
74
82
# Ensure that no recomputation happens by decrementing a countdown on a semaphore
75
- acquired = sem .acquire (timeout = 0. 1 )
76
- assert acquired is True
83
+ acquired = sem .acquire (timeout = 1 )
84
+ assert acquired is True , "Could not acquire semaphore, likely recomputation happened."
77
85
ev .wait ()
78
86
return x
79
87
88
+ def workload (
89
+ fan_out_size ,
90
+ ev_fan_out ,
91
+ sem_fan_out ,
92
+ ev_barrier ,
93
+ sem_barrier ,
94
+ ev_final_fan_out ,
95
+ sem_final_fan_out ,
96
+ ):
97
+ fan_out = [
98
+ clog (i , ev = ev_fan_out , sem = sem_fan_out ) for i in range (fan_out_size )
99
+ ]
100
+ barrier = clog (delayed (sum )(fan_out ), ev = ev_barrier , sem = sem_barrier )
101
+ final_fan_out = [
102
+ clog (i , ev = ev_final_fan_out , sem = sem_final_fan_out , barrier = barrier )
103
+ for i in range (fan_out_size )
104
+ ]
105
+ return final_fan_out
106
+
80
107
sem_fan_out = Semaphore (name = "fan-out" , max_leases = fan_out_size )
81
108
ev_fan_out = Event (name = "fan-out" , client = client )
82
-
83
- fut = client .map (
84
- clog , range (fan_out_size ), ev = ev_fan_out , sem = sem_fan_out
85
- )
86
-
87
- fut = client .submit (sum , fut )
88
109
sem_barrier = Semaphore (name = "barrier" , max_leases = 1 )
89
110
ev_barrier = Event (name = "barrier" , client = client )
90
- fut = client .submit (clog , fut , ev = ev_barrier , sem = sem_barrier )
91
-
92
111
sem_final_fan_out = Semaphore (name = "final-fan-out" , max_leases = fan_out_size )
93
112
ev_final_fan_out = Event (name = "final-fan-out" , client = client )
94
- fut = client .map (
95
- clog ,
96
- range (fan_out_size ),
97
- ev = ev_final_fan_out ,
98
- sem = sem_final_fan_out ,
99
- barrier = fut ,
113
+
114
+ fut = client .compute (
115
+ workload (
116
+ fan_out_size = fan_out_size ,
117
+ ev_fan_out = ev_fan_out ,
118
+ sem_fan_out = sem_fan_out ,
119
+ ev_barrier = ev_barrier ,
120
+ sem_barrier = sem_barrier ,
121
+ ev_final_fan_out = ev_final_fan_out ,
122
+ sem_final_fan_out = sem_final_fan_out ,
123
+ )
100
124
)
101
125
102
126
# Scale up to maximum
103
127
start = time .monotonic ()
104
128
client .wait_for_workers (n_workers = maximum , timeout = TIMEOUT_THRESHOLD )
105
129
end = time .monotonic ()
106
130
duration_first_scale_up = end - start
107
- assert duration_first_scale_up < 420
131
+ assert duration_first_scale_up < 120
108
132
assert len (cluster .observed ) == maximum
109
133
assert adapt .log [- 1 ][1 ]["status" ] == "up"
110
134
@@ -117,7 +141,7 @@ def clog(x: int, ev: Event, sem: Semaphore, **kwargs) -> int:
117
141
time .sleep (0.1 )
118
142
end = time .monotonic ()
119
143
duration_first_scale_down = end - start
120
- assert duration_first_scale_down < 420
144
+ assert duration_first_scale_down < 330
121
145
assert len (cluster .observed ) == 1
122
146
assert adapt .log [- 1 ][1 ]["status" ] == "down"
123
147
@@ -127,7 +151,7 @@ def clog(x: int, ev: Event, sem: Semaphore, **kwargs) -> int:
127
151
client .wait_for_workers (n_workers = maximum , timeout = TIMEOUT_THRESHOLD )
128
152
end = time .monotonic ()
129
153
duration_second_scale_up = end - start
130
- assert duration_second_scale_up < 420
154
+ assert duration_second_scale_up < 120
131
155
assert len (cluster .observed ) == maximum
132
156
assert adapt .log [- 1 ][1 ]["status" ] == "up"
133
157
@@ -143,7 +167,7 @@ def clog(x: int, ev: Event, sem: Semaphore, **kwargs) -> int:
143
167
time .sleep (0.1 )
144
168
end = time .monotonic ()
145
169
duration_second_scale_down = end - start
146
- assert duration_second_scale_down < 420
170
+ assert duration_second_scale_down < 330
147
171
assert len (cluster .observed ) == minimum
148
172
assert adapt .log [- 1 ][1 ]["status" ] == "down"
149
173
return (
@@ -160,6 +184,10 @@ def clog(x: int, ev: Event, sem: Semaphore, **kwargs) -> int:
160
184
@pytest .mark .stability
161
185
@pytest .mark .parametrize ("minimum" , (0 , 1 ))
162
186
def test_adapt_to_memory_intensive_workload (minimum ):
187
+ """Tests that adaptive scaling reacts within a reasonable amount of time to a varying task and memory load.
188
+
189
+ Note: This tests currently results in spilling and very long runtimes.
190
+ """
163
191
maximum = 10
164
192
with Cluster (
165
193
name = f"test_adaptive_scaling-{ uuid .uuid4 ().hex } " ,
0 commit comments