test with regular layers

Dhairya Gandhi · CarloLucibello · commit 0a754741cba6 · 2021-07-09T18:04:01.000+02:00
diff --git a/src/layers/basic.jl b/src/layers/basic.jl
@@ -143,22 +143,13 @@ end
 
 @functor Dense
 
-<<<<<<< HEAD
 function (a::Dense)(x::AbstractVecOrMat)
   W, b, σ = a.weight, a.bias, a.σ
   return σ.(W*x .+ b)
 end
 
 (a::Dense)(x::AbstractArray) = 
   reshape(a(reshape(x, size(x,1), :)), :, size(x)[2:end]...)
-=======
-function (a::Dense)(x::Union{AbstractVector, AbstractMatrix})
-  W, b, σ = a.W, a.b, a.σ
-  return σ.(W*x .+ b)
-end
-
-(a::Dense)(x::AbstractArray) = reshape(a(mat(x)), :, size(x)[2:end]...)
->>>>>>> 017acdf9 (extend to generic arrays; add cuda tests)
 
 function Base.show(io::IO, l::Dense)
   print(io, "Dense(", size(l.weight, 2), ", ", size(l.weight, 1))
@@ -484,4 +475,3 @@ end
 function Base.show(io::IO, m::Embedding)
   print(io, "Embedding($(size(m.weight, 2)), $(size(m.weight, 1)))")
 end
->>>>>>> b22cd2dc (cl/embed)
diff --git a/test/cuda/layers.jl b/test/cuda/layers.jl
@@ -114,6 +114,9 @@ pixelshuffle = [PixelShuffle]
 gpu_gradtest("PixelShuffle 2d", pixelshuffle, rand(Float32, 3, 4, 18, 3), 3)
 gpu_gradtest("PixelShuffle 1d", pixelshuffle, rand(Float32, 3, 18, 3), 3)
 
+embedding = [Embedding]
+gpu_gradtest("Embedding", embedding, rand(1:10, 3), 10, 4)
+
 @testset "function layers" begin
   x = rand(Float32, 3,3)
   gpu_autodiff_test(x -> sum(Flux.normalise(x; dims=1)), x)
@@ -258,18 +261,19 @@ end
       @test gs_cpu[pcpu] ≈ gs_gpu[pgpu]
     end
   end
+end
 
-  @testset "Embedding" begin
-    vocab_size, embed_size = 10, 4
-    m = Embedding(vocab_size, embed_size)
-    x = rand(1:vocab_size, 3)
-    y = m(x)
-    m_g = m |> gpu
-    x_g = x |> gpu
-    y_g = m_g(x_g)
-    @test collect(y_g) == y
-    gs = gradient(() -> sum(tanh.(m(x))), params(m))
-    gs_g = gradient(() -> sum(tanh.(m_g(x_g))), params(m_g))
-    @test collect(gs_g[m_g.weight]) ≈ gs[m.weight]
-  end
+@testset "Embedding" begin
+  vocab_size, embed_size = 10, 4
+  m = Embedding(vocab_size, embed_size)
+  x = rand(1:vocab_size, 3)
+  y = m(x)
+  m_g = m |> gpu
+  x_g = x |> gpu
+  y_g = m_g(x_g)
+  @test collect(y_g) == y
+  gs = gradient(() -> sum(tanh.(m(x))), params(m))
+  gs_g = gradient(() -> sum(tanh.(m_g(x_g))), params(m_g))
+  @test collect(gs_g[m_g.weight]) ≈ gs[m.weight]
 end
+