DEEPDIP-project
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Project.toml‎
Lines changed: 21 additions & 10 deletions b/‎Project.toml‎
Lines changed: 21 additions & 10 deletions
diff --git a/‎src/ConvolutionalNeuralOperators.jl‎
Lines changed: 4 additions & 0 deletions b/‎src/ConvolutionalNeuralOperators.jl‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎src/convolution.jl‎
Lines changed: 224 additions & 0 deletions b/‎src/convolution.jl‎
Lines changed: 224 additions & 0 deletions
@@ -12,3 +12,5 @@ coverage
 docs/build/
 env
 node_modules
+test/test_figs/activation.png
+test/test_figs/downsampling_upsampling.png
@@ -4,54 +4,65 @@ authors = ["SCiarella <[email protected]>"]
 version = "0.1.0"
 
 [deps]
+AbstractFFTs = "621f4979-c628-5d54-868e-fcf4e3e8185c"
+Atomix = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
 CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
-ChainRules = "082447d4-558c-5d27-93f4-14fc19e9eca2"
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
 ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 FFTW = "7a1cc6ca-52ef-59f5-83cd-3a7055c09341"
 JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
+KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
 Lux = "b2108857-7c20-44ae-9111-449ecde12c47"
+LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
 LuxCore = "bb33d45b-7691-41d6-9220-0943567d0623"
 NNlib = "872c559c-99b0-510c-b3b7-b6c96a88d5cd"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
-Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 
 [sources]
-CoupledNODE = {rev = "main", url = "https://github.com/DEEPDIP-project/CoupledNODE.jl.git"}
+CoupledNODE= {rev = "main", url = "https://github.com/DEEPDIP-project/CoupledNODE.jl.git"}
 NeuralClosure = {rev = "main", url = "https://github.com/DEEPDIP-project/NeuralClosure.jl.git"}
 
 [compat]
+AbstractFFTs = "1.5.0"
+Atomix = "1.1.1"
 CUDA = "5"
-ChainRules = "1"
+CairoMakie = "0.12"
+ChainRulesCore = "1.25.1"
+ChainRulesTestUtils = "1.13.0"
 ComponentArrays = "0.15"
 DifferentialEquations = "7.16.0"
 FFTW = "1"
+Images = "0.26.2"
 JuliaFormatter = "1.0.62"
+KernelAbstractions = "0.9.34"
 Lux = "1"
+LuxCUDA = "0.3.3"
 LuxCore = "1"
 NNlib = "0.9"
 Optimization = "4.1.1"
 OptimizationOptimisers = "0.3.7"
-Plots = "1.40.10"
 TestImages = "1.9.0"
-Tullio = "0.3"
-julia = "1.10"
+julia = "1.11"
 
 [extras]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
+CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
+ChainRulesTestUtils = "cdddcdb0-9152-4a09-a978-84456f9df70a"
 CoupledNODE = "88291d29-22ea-41b1-bc0b-03785bffce48"
 DifferentialEquations = "0c46a032-eb83-5123-abaf-570d42b7fbaa"
+Images = "916415d5-f1e6-5110-898d-aaa5f9f070e0"
 IncompressibleNavierStokes = "5e318141-6589-402b-868d-77d7df8c442e"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 NeuralClosure = "099dac27-d7f2-4047-93d5-0baee36b9c25"
 Optimisers = "3bd65402-5787-11e9-1adc-39752487f4e2"
 Optimization = "7f7a1694-90dd-40f0-9382-eb1efda571ba"
-OrdinaryDiffEqTsit5 = "b1df2697-797e-41e3-8120-5422d3b24e4a"
 OptimizationOptimisers = "42dfb2eb-d2b4-4451-abcd-913932933ac1"
-Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
+OrdinaryDiffEqTsit5 = "b1df2697-797e-41e3-8120-5422d3b24e4a"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 TestImages = "5e47fb64-e119-507b-a336-dd2b206d9990"
 TestItemRunner = "f8b46487-2199-4994-9208-9a1283c18c0a"
 Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
 
 [targets]
-test = ["Test", "TestImages", "Adapt", "CoupledNODE", "IncompressibleNavierStokes", "JLD2", "NeuralClosure", "Optimisers", "OrdinaryDiffEqTsit5", "TestItemRunner", "Zygote", "Plots", "DifferentialEquations", "Optimization", "OptimizationOptimisers"]
+test = ["Test", "TestImages", "Images", "Adapt", "CoupledNODE", "IncompressibleNavierStokes", "JLD2", "NeuralClosure", "Optimisers", "OrdinaryDiffEqTsit5", "TestItemRunner", "Zygote", "CairoMakie", "DifferentialEquations", "Optimization", "OptimizationOptimisers", "ChainRulesTestUtils"]
@@ -3,6 +3,10 @@ module ConvolutionalNeuralOperators
 using CUDA: CUDA
 ArrayType = CUDA.functional() ? CUDA.CuArray : Array
 
+include("filters.jl")
+include("convolution.jl")
+include("downsample.jl")
+include("upsample.jl")
 include("utils.jl")
 include("models.jl")
 
 
@@ -0,0 +1,224 @@
+using Lux: Lux, relu, leakyrelu
+using LuxCUDA
+using LuxCore: AbstractLuxLayer
+using Random: AbstractRNG
+using ComponentArrays: ComponentArray
+using KernelAbstractions
+using Atomix: @atomic
+using AbstractFFTs: fft, ifft
+using FFTW: fft, ifft
+
+@kernel inbounds = true function convolve_kernel(ffty_r, ffty_im, fft_x, fft_k, ch_x)
+    i, j, c, b = @index(Global, NTuple)
+    for ci = 1:ch_x
+        y = fft_x[i, j, ci, b] * fft_k[c, i, j]
+        # In order to use atomic operation I have to split the real and imaginary part
+        @atomic ffty_r[i, j, c, b] += real(y)
+        @atomic ffty_im[i, j, c, b] += imag(y)
+    end
+end
+
+function convolve(x, k)
+    fft_x = fft(x, (1, 2))
+    fft_k = fft(k, (2, 3))
+
+    if CUDA.functional() && k isa CuArray
+        # TODO: type is hardcoded
+        ffty_r = CUDA.zeros(Float32, size(x, 1), size(x, 2), size(k, 1), size(x, 4))
+        ffty_im = CUDA.zeros(Float32, size(x, 1), size(x, 2), size(k, 1), size(x, 4))
+        backend = CUDABackend()
+        workgroupsize = 256
+    else
+        ffty_r = zeros(Float32, size(x, 1), size(x, 2), size(k, 1), size(x, 4))
+        ffty_im = zeros(Float32, size(x, 1), size(x, 2), size(k, 1), size(x, 4))
+        backend = CPU()
+        workgroupsize = 64
+    end
+
+    # Launch the kernel
+    convolve_kernel(backend, workgroupsize)(
+        ffty_r,
+        ffty_im,
+        fft_x,
+        fft_k,
+        size(x, 3);
+        ndrange = size(ffty_r),
+    )
+
+    real(ifft(ComplexF32.(ffty_r, ffty_im), (1, 2)))
+end
+
+
+function ChainRulesCore.rrule(::typeof(convolve), x, k)
+    # Given Y = X * K (where * denotes convolution),
+    # the gradients for backpropagation are:
+    #
+    # 1. Gradient w.r.t. X:
+    #    ∂L/∂X = (∂L/∂Y) * flip(K)
+    #    In the Fourier domain: ℱ(∂L/∂X) = ℱ(∂L/∂Y) * conj(ℱ(K))
+    #
+    # 2. Gradient w.r.t. K:
+    #    ∂L/∂K = flip(X * (∂L/∂Y))
+    #    In the Fourier domain: ℱ(∂L/∂K) = conj(ℱ(X)) * ℱ(∂L/∂Y)
+    #
+    # Here, flip(K) represents a 180-degree rotation (flipping in both dimensions),
+    # and conj() denotes the complex conjugate in the Fourier domain.
+
+    y = convolve(x, k)
+    fft_x = fft(x, (1, 2))
+    fft_k = fft(k, (2, 3))
+
+    function convolve_pb(y_bar)
+        ffty_bar = fft(y_bar, (1, 2))
+
+        if CUDA.functional() && k isa CuArray
+            x_bar_re = CUDA.zeros(Float32, size(x))
+            x_bar_im = CUDA.zeros(Float32, size(x))
+            k_bar_re = CUDA.zeros(Float32, size(k))
+            k_bar_im = CUDA.zeros(Float32, size(k))
+            backend = CUDABackend()
+            workgroupsize = 256
+        else
+            x_bar_re = zeros(Float32, size(x))
+            x_bar_im = zeros(Float32, size(x))
+            k_bar_re = zeros(Float32, size(k))
+            k_bar_im = zeros(Float32, size(k))
+            backend = CPU()
+            workgroupsize = 64
+        end
+
+        # Launch the adjoint kernel for x
+        convolve_adjoint_x_kernel(backend, workgroupsize)(
+            x_bar_re,
+            x_bar_im,
+            ffty_bar,
+            fft_k;
+            ndrange = size(x),
+        )
+        # Launch the adjoint kernel for k
+        convolve_adjoint_k_kernel(backend, workgroupsize)(
+            k_bar_re,
+            k_bar_im,
+            fft_x,
+            ffty_bar,
+            size(x, 3);
+            ndrange = size(k),
+        )
+
+        x_bar = ComplexF32.(x_bar_re, x_bar_im)
+        k_bar = ComplexF32.(k_bar_re, k_bar_im)
+
+        x_bar = real(ifft(x_bar, (1, 2)))
+        k_bar = real(ifft(k_bar, (2, 3)))
+
+        return NoTangent(), x_bar, k_bar
+    end
+    return y, convolve_pb
+end
+
+@kernel inbounds = true function convolve_adjoint_x_kernel(
+    x_bar_re,
+    x_bar_im,
+    ffty_bar,
+    fft_k,
+)
+    i, j, ci, b = @index(Global, NTuple)
+    for c = 1:size(fft_k, 1)
+        # Use the complex conjugate to backprop the convolution
+        y = ffty_bar[i, j, c, b] * conj(fft_k[c, i, j])
+        @atomic x_bar_re[i, j, ci, b] += real(y)
+        @atomic x_bar_im[i, j, ci, b] += imag(y)
+    end
+end
+
+@kernel inbounds = true function convolve_adjoint_k_kernel(
+    k_bar_re,
+    k_bar_im,
+    fft_x,
+    ffty_bar,
+    ch_x,
+)
+    c, i, j = @index(Global, NTuple)
+    for b = 1:size(fft_x, 4)
+        for ci = 1:ch_x
+            y = conj(fft_x[i, j, ci, b]) * ffty_bar[i, j, c, b]
+            @atomic k_bar_re[c, i, j] += real(y)
+            @atomic k_bar_im[c, i, j] += imag(y)
+        end
+    end
+end
+
+
+function apply_masked_convolution(y, k, mask)
+    # to get the correct k i have to reshape+mask+trim
+    # TODO: i don't like this...
+    # ! Zygote does not like that you reuse variable names so, this makes it even uglier with the definition of k2 and k3
+    # ! also Zygote wants the mask to be explicitely defined as a vector so i have to pull it out from the tuple via mask=masks[i]
+
+    # Apply the mask to the kernel
+    k2 = mask_kernel(k, mask)
+
+    # Adjust the kernel size to match the input dimensions
+    k3 = trim_kernel(k2, size(y))
+
+    # Apply the convolution
+    y = convolve(y, k3)
+
+    return y
+end
+
+function trim_kernel(k, sizex)
+    xx, xy, _, _ = sizex
+    # Trim the kernel to match the input dimensions
+    if k isa CuArray
+        return CUDA.@allowscalar(k[:, 1:xx, 1:xy])
+    else
+        return @view k[:, 1:xx, 1:xy]
+    end
+end
+
+function ChainRulesCore.rrule(::typeof(trim_kernel), k, sizex)
+    y = trim_kernel(k, sizex)
+    if k isa CuArray
+        k_bar = CUDA.zeros(Float32, size(k))
+    else
+        k_bar = zeros(Float32, size(k))
+    end
+
+    function trim_kernel_pullback(y_bar)
+        k_bar[:, 1:size(y_bar)[2], 1:size(y_bar)[3]] .= y_bar
+        return NoTangent(), k_bar, NoTangent()
+    end
+    return y, trim_kernel_pullback
+end
+
+
+function mask_kernel(k, mask)
+    permutedims(permutedims(k, [2, 3, 1]) .* mask, [3, 1, 2])
+end
+
+function get_kernel(ks, chrange)
+    if ks isa CuArray
+        return CUDA.@allowscalar(ks[chrange, :, :])
+    else
+        return @view(ks[chrange, :, :])
+    end
+end
+
+function ChainRulesCore.rrule(::typeof(get_kernel), ks, chrange)
+    result = get_kernel(ks, chrange)
+
+    function get_kernel_pullback(result_bar)
+        if ks isa CuArray
+            k_bar = CUDA.zeros(Float32, size(ks))
+            k_bar[chrange, :, :] .= CUDA.@allowscalar(result_bar)
+        else
+            k_bar = zeros(Float32, size(ks))
+            k_bar[chrange, :, :] .= result_bar
+        end
+
+        return NoTangent(), k_bar, NoTangent()
+    end
+
+    return result, get_kernel_pullback
+end