-
Notifications
You must be signed in to change notification settings - Fork 35
/
Copy pathtest_utils.jl
294 lines (256 loc) · 9.37 KB
/
test_utils.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
# More test utilities. Can't be included in KernelFunctions because they introduce a number
# of additional deps that we don't want to have in the main package.
# Check parameters of kernels. `trainable`, `params!`, and `params` are taken directly from
# Flux.jl so as to avoid having to depend on Flux at test-time.
trainable(m) = functor(m)[1]
params!(p::Zygote.Params, x::AbstractArray{<:Number}, seen=Zygote.IdSet()) = push!(p, x)
function params!(p::Zygote.Params, x, seen=Zygote.IdSet())
x in seen && return nothing
push!(seen, x)
for child in trainable(x)
params!(p, child, seen)
end
end
function params(m...)
ps = Zygote.Params()
params!(ps, m)
return ps
end
function test_params(kernel, reference)
params_kernel = params(kernel)
params_reference = params(reference)
@test length(params_kernel) == length(params_reference)
@test all(p == q for (p, q) in zip(params_kernel, params_reference))
end
# AD utilities
# Type to work around some performance issues that can happen on the reverse-pass of Zygote.
# This context doesn't allow any globals. Don't use this if you use globals in your
# programme.
struct NoContext <: Zygote.AContext end
Zygote.cache(cx::NoContext) = (cache_fields = nothing)
Base.haskey(cx::NoContext, x) = false
Zygote.accum_param(::NoContext, x, Δ) = Δ
const FDM = FiniteDifferences.central_fdm(5, 1)
gradient(f, s::Symbol, args) = gradient(f, Val(s), args)
function gradient(f, ::Val{:Zygote}, args)
g = first(Zygote.gradient(f, args))
if isnothing(g)
if args isa AbstractArray{<:Real}
return zeros(size(args)) # To respect the same output as other ADs
else
return zeros.(size.(args))
end
else
return g
end
end
function gradient(f, ::Val{:ForwardDiff}, args)
return ForwardDiff.gradient(f, args)
end
function gradient(f, ::Val{:ReverseDiff}, args)
return ReverseDiff.gradient(f, args)
end
function gradient(f, ::Val{:FiniteDiff}, args)
return first(FiniteDifferences.grad(FDM, f, args))
end
function compare_gradient(f, ::Val{:FiniteDiff}, args)
@test_nowarn gradient(f, :FiniteDiff, args)
end
function compare_gradient(f, AD::Symbol, args)
grad_AD = gradient(f, AD, args)
grad_FD = gradient(f, :FiniteDiff, args)
@test grad_AD ≈ grad_FD atol = 1e-8 rtol = 1e-5
end
testfunction(k, A, B, dim) = sum(kernelmatrix(k, A, B; obsdim=dim))
testfunction(k, A, dim) = sum(kernelmatrix(k, A; obsdim=dim))
testdiagfunction(k, A, dim) = sum(kernelmatrix_diag(k, A; obsdim=dim))
testdiagfunction(k, A, B, dim) = sum(kernelmatrix_diag(k, A, B; obsdim=dim))
testfunction(k::MOKernel, A, B) = sum(kernelmatrix(k, A, B))
testfunction(k::MOKernel, A) = sum(kernelmatrix(k, A))
testdiagfunction(k::MOKernel, A) = sum(kernelmatrix_diag(k, A))
testdiagfunction(k::MOKernel, A, B) = sum(kernelmatrix_diag(k, A, B))
function test_ADs(
kernelfunction, args=nothing; ADs=[:Zygote, :ForwardDiff, :ReverseDiff], dims=[3, 3]
)
test_fd = test_AD(:FiniteDiff, kernelfunction, args, dims)
if !test_fd.anynonpass
for AD in ADs
test_AD(AD, kernelfunction, args, dims)
end
end
end
function check_zygote_type_stability(f, args...; ctx=Zygote.Context())
@inferred f(args...)
@inferred Zygote._pullback(ctx, f, args...)
out, pb = Zygote._pullback(ctx, f, args...)
@inferred pb(out)
end
function test_ADs(
k::MOKernel; ADs=[:Zygote, :ForwardDiff, :ReverseDiff], dims=(in=3, out=2, obs=3)
)
test_fd = test_FiniteDiff(k, dims)
if !test_fd.anynonpass
for AD in ADs
test_AD(AD, k, dims)
end
end
end
function test_FiniteDiff(k::MOKernel, dims=(in=3, out=2, obs=3))
rng = MersenneTwister(42)
@testset "FiniteDifferences" begin
## Testing Kernel Functions
x = (rand(rng, dims.in), rand(rng, 1:(dims.out)))
y = (rand(rng, dims.in), rand(rng, 1:(dims.out)))
@test_nowarn gradient(:FiniteDiff, x[1]) do a
k((a, x[2]), y)
end
## Testing Kernel Matrices
A = [(randn(rng, dims.in), rand(rng, 1:(dims.out))) for i in 1:(dims.obs)]
B = [(randn(rng, dims.in), rand(rng, 1:(dims.out))) for i in 1:(dims.obs)]
@test_nowarn gradient(:FiniteDiff, reduce(hcat, first.(A))) do a
A = tuple.(eachcol(a), last.(A))
testfunction(k, A)
end
@test_nowarn gradient(:FiniteDiff, reduce(hcat, first.(A))) do a
A = tuple.(eachcol(a), last.(A))
testfunction(k, A, B)
end
@test_nowarn gradient(:FiniteDiff, reduce(hcat, first.(B))) do b
B = tuple.(eachcol(b), last.(B))
testfunction(k, A, B)
end
@test_nowarn gradient(:FiniteDiff, reduce(hcat, first.(A))) do a
A = tuple.(eachcol(a), last.(A))
testdiagfunction(k, A)
end
@test_nowarn gradient(:FiniteDiff, reduce(hcat, first.(A))) do a
A = tuple.(eachcol(a), last.(A))
testdiagfunction(k, A, B)
end
@test_nowarn gradient(:FiniteDiff, reduce(hcat, first.(B))) do b
B = tuple.(eachcol(b), last.(B))
testdiagfunction(k, A, B)
end
end
end
function test_AD(AD::Symbol, kernelfunction, args=nothing, dims=[3, 3])
@testset "$(AD)" begin
k = if args === nothing
kernelfunction()
else
kernelfunction(args)
end
rng = MersenneTwister(42)
if k isa SimpleKernel
@testset "kappa function" begin
for d in log.([eps(), rand(rng)])
compare_gradient(AD, [d]) do x
kappa(k, exp(x[1]))
end
end
end
end
@testset "kernel evaluations" begin
x = rand(rng, dims[1])
y = rand(rng, dims[1])
@testset "first argument" begin
compare_gradient(AD, x) do x
k(x, y)
end
end
@testset "second argument" begin
compare_gradient(AD, y) do y
k(x, y)
end
end
if !(args === nothing)
@testset "hyperparameters" begin
compare_gradient(AD, args) do p
kernelfunction(p)(x, y)
end
end
end
end
@testset "kernel matrices" begin
A = rand(rng, dims...)
B = rand(rng, dims...)
for dim in 1:2
compare_gradient(AD, A) do a
testfunction(k, a, dim)
end
compare_gradient(AD, A) do a
testfunction(k, a, B, dim)
end
compare_gradient(AD, B) do b
testfunction(k, A, b, dim)
end
if !(args === nothing)
compare_gradient(AD, args) do p
testfunction(kernelfunction(p), A, dim)
end
compare_gradient(AD, args) do p
testfunction(kernelfunction(p), A, B, dim)
end
end
compare_gradient(AD, A) do a
testdiagfunction(k, a, dim)
end
compare_gradient(AD, A) do a
testdiagfunction(k, a, B, dim)
end
compare_gradient(AD, B) do b
testdiagfunction(k, A, b, dim)
end
if args !== nothing
compare_gradient(AD, args) do p
testdiagfunction(kernelfunction(p), A, dim)
end
compare_gradient(AD, args) do p
testdiagfunction(kernelfunction(p), A, B, dim)
end
end
end
end
end
end
function test_AD(AD::Symbol, k::MOKernel, dims=(in=3, out=2, obs=3))
@testset "$(AD)" begin
rng = MersenneTwister(42)
# Testing kernel evaluations
x = (rand(rng, dims.in), rand(rng, 1:(dims.out)))
y = (rand(rng, dims.in), rand(rng, 1:(dims.out)))
compare_gradient(AD, x[1]) do a
k((a, x[2]), y)
end
compare_gradient(AD, y[1]) do b
k(x, (b, y[2]))
end
# Testing kernel matrices
A = [(randn(rng, dims.in), rand(rng, 1:(dims.out))) for i in 1:(dims.obs)]
B = [(randn(rng, dims.in), rand(rng, 1:(dims.out))) for i in 1:(dims.obs)]
compare_gradient(AD, reduce(hcat, first.(A))) do a
A = tuple.(eachcol(a), last.(A))
testfunction(k, A)
end
compare_gradient(AD, reduce(hcat, first.(A))) do a
A = tuple.(eachcol(a), last.(A))
testfunction(k, A, B)
end
compare_gradient(AD, reduce(hcat, first.(B))) do b
B = tuple.(eachcol(b), last.(B))
testfunction(k, A, B)
end
compare_gradient(AD, reduce(hcat, first.(A))) do a
A = tuple.(eachcol(a), last.(A))
testdiagfunction(k, A)
end
compare_gradient(AD, reduce(hcat, first.(A))) do a
A = tuple.(eachcol(a), last.(A))
testdiagfunction(k, A, B)
end
compare_gradient(AD, reduce(hcat, first.(B))) do b
B = tuple.(eachcol(b), last.(B))
testdiagfunction(k, A, B)
end
end
end