Partial first, derivative at concept

FelixBenning · FelixBenning · commit 4c4032889ad8 · 2023-06-06T14:22:55.000+02:00
diff --git a/src/diffKernel.jl b/src/diffKernel.jl
@@ -2,24 +2,36 @@ import ForwardDiff as FD
 import LinearAlgebra as LA
 using KernelFunctions: SimpleKernel, Kernel
 
+const DiffPt = Tuple{Partial,Vararg} # allow for one dimensional and MultiOutput kernels
 """
-	diffKernelCall(k::T, (x,px)::DiffPt, (y,py)::DiffPt) where {Dim, T<:Kernel}
+	diffKernelCall(k::T, (px,x)::DiffPt, (py, y)::DiffPt) where {Dim, T<:Kernel}
 
 specialization for DiffPt. Unboxes the partial instructions from DiffPt and
 applies them to k, evaluates them at the positions of DiffPt
 """
-function diffKernelCall(k::T, (x,px)::DiffPt, (y,py)::DiffPt) where {T<:Kernel}
+function diffKernelCall(k::T, (px, x)::Tuple{Partial,Pos1}, (py, y)::Tuple{Partial,Pos2}) where {T<:Kernel,Pos1,Pos2}
+    # need Pos1 and Pos2 because k(1,1.) is allowed (combination of Int and Float) mabye there is a better solution resulting in more type safety?
     return apply_partial(k, px.indices, py.indices)(x, y)
 end
+"""
+Multi Kernel Version (do not try to take the derivative with regard to out indices)
+"""
+function diffKernelCall(
+    k::T,
+    (px, x, x_out)::Tuple{Partial,Pos1,Idx1},
+    (py, y, y_out)::Tuple{Partial,Pos2,Idx2}
+) where {T<:MOKernel,Pos1,Idx1,Pos2,Idx2}
+    return apply_partial((x, y) -> k((x, x_out), (y, y_out)), px.indices, py.indices)(x, y)
+end
 
 """
     EnableDiff
 
 A thin wrapper around Kernels enabling the machinery which allows you to
-input (x, ∂ᵢ), (y, ∂ⱼ) where ∂ᵢ, ∂ⱼ are of `Partial` type (see [partial](@ref)) in order
+input (∂ᵢ, x), (∂ⱼ, y) where ∂ᵢ, ∂ⱼ are of `Partial` type (see [partial](@ref)) in order
 to calculate
 ``
-    k((x, ∂ᵢ), (y,∂ⱼ)) = \\text{Cov}(\\partial_i Z(x), \\partial_j Z(y))
+    k((∂ᵢ, x), (∂ⱼ, y)) = \\text{Cov}(\\partial_i Z(x), \\partial_j Z(y))
 ``
 for ``Z`` with ``k(x,y) = \\text{Cov}(Z(x), Z(y))``.
 
@@ -47,7 +59,7 @@ struct EnableDiff{T<:Kernel} <: Kernel
     kernel::T
 end
 (k::EnableDiff)(x::DiffPt, y::DiffPt) = diffKernelCall(k.kernel, x, y)
-(k::EnableDiff)(x::DiffPt, y) = diffKernelCall(k.kernel, x,(y, partial()))
-(k::EnableDiff)(x, y::DiffPt) = diffKernelCall(k.kernel, (x, partial()), y)
-(k::EnableDiff)(x, y) = k.kernel(x,y) # Fall through case 
+(k::EnableDiff)(x::DiffPt, y) = diffKernelCall(k.kernel, x, (partial(), y))
+(k::EnableDiff)(x, y::DiffPt) = diffKernelCall(k.kernel, (partial(), x), y)
+(k::EnableDiff)(x, y) = k.kernel(x, y) # Fall through case 
 
diff --git a/src/partial.jl b/src/partial.jl
@@ -49,7 +49,6 @@ for T in [MIME"text/plain", MIME"text/html"]
     end
 end
 
-const DiffPt{T} = Tuple{T,Partial}
 
 function fullderivative(::Val{order}, input_indices::AbstractVector{Int}) where {order}
     return mappedarray(partial, productArray(ntuple(_ -> input_indices, Val{order}())...))
@@ -65,6 +64,15 @@ gradient(dim::Integer) = fullderivative(Val(1), dim)
 hessian(input_indices::AbstractArray) = fullderivative(Val(2), input_indices)
 hessian(dim::Integer) = fullderivative(Val(2), dim)
 
+diffAt(::Val{order}, x) where {order} = productArray(Ref(x), _diffAt(Base.IteratorSize(x), Val(order), x))
+_diffAt(::Base.HasLength, ::Val{order}, x) where {order} = fullderivative(Val(order), Base.OneTo(length(x)))
+_diffAt(::Base.HasShape{1}, ::Val{order}, x) where {order} = fullderivative(Val(order), Base.OneTo(length(x)))
+_diffAt(::Base.HasShape, ::Val{order}, x) where {order} = fullderivative(Val(order), CartesianIndices(axes(x)))
+
+gradAt(x) = diffAt(Val(1), x)
+grad(f) = x -> f.(gradAt(x)) # for f = rand(::GP),  grad(f)(x) should work.
+
+
 # idea: lazy mappings can be undone (extract original range -> towards a specialization speedup of broadcasting over multiple derivatives using backwardsdiff)
 const MappedPartialVec{T} = ReadonlyMappedArray{Partial{1,Tuple{Int}},1,T,typeof(partial)}
 function extract_range(p_map::MappedPartialVec{T}) where {T<:AbstractUnitRange{Int}}
diff --git a/test/diffKernel.jl b/test/diffKernel.jl
@@ -3,8 +3,8 @@
         k = EnableDiff(MaternKernel())
         k2 = MaternKernel()
         @test k(1, 1) == k2(1, 1)
-        k(1, (1, partial(1, 1))) # Cov(Z(x), ∂₁∂₁Z(y)) where x=1, y=1
-        k(([1], partial(1)), [2]) # Cov(∂₁Z(x), Z(y)) where x=[1], y=[2]
+        k(1, (partial(1, 1), 1)) # Cov(Z(x), ∂₁∂₁Z(y)) where x=1, y=1
+        k((partial(1), [1]), [2]) # Cov(∂₁Z(x), Z(y)) where x=[1], y=[2]
     end
 
     @testset "Sanity Checks with $k1" for k1 in [
@@ -19,19 +19,19 @@
             ## This fails for Matern and RationalQuadraticKernel
             # because its implementation branches on x == y resulting in a zero derivative
             # (cf. https://github.com/JuliaGaussianProcesses/KernelFunctions.jl/issues/517)
-            @test k((x, partial(1)), (x, partial(1))) > 0
+            @test k((partial(1), x), (partial(1), x)) > 0
 
             # the slope should be positively correlated with a point further down
             @test k(
-                (x, partial(1)), # slope
+                (partial(1), x), # slope
                 x + 1e-2, # point further down
             ) > 0
 
             @testset "Stationary Tests" begin
-                @test k((x, partial(1)), x) == 0 # expect Cov(∂Z(x) , Z(x)) == 0
+                @test k((partial(1), x), x) == 0 # expect Cov(∂Z(x) , Z(x)) == 0
 
                 @testset "Isotropic Tests" begin
-                    @test k(([1, 2], partial(1)), ([1, 2], partial(2))) == 0 # cross covariance should be zero
+                    @test k((partial(1), [1, 2]), (partial(2), [1, 2])) == 0 # cross covariance should be zero
                 end
             end
         end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -1,4 +1,4 @@
-using KernelFunctions: KernelFunctions as KF, MaternKernel, SEKernel, RationalQuadraticKernel
+using KernelFunctions: KernelFunctions as KF, MaternKernel, SEKernel, RationalQuadraticKernel, Matern32Kernel, Matern52Kernel
 using DifferentiableKernelFunctions: DifferentiableKernelFunctions as DKF, EnableDiff, partial
 using ProductArrays: productArray
 using Test

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-using KernelFunctions: KernelFunctions as KF, MaternKernel, SEKernel, RationalQuadraticKernel`
	`1`	`+using KernelFunctions: KernelFunctions as KF, MaternKernel, SEKernel, RationalQuadraticKernel, Matern32Kernel, Matern52Kernel`
`2`	`2`	`using DifferentiableKernelFunctions: DifferentiableKernelFunctions as DKF, EnableDiff, partial`
`3`	`3`	`using ProductArrays: productArray`
`4`	`4`	`using Test`