refactor: cleanup implementation and make test more strict

polvalente · polvalente · commit bd246f7b2e57 · 2025-01-13T15:14:46.000-03:00
diff --git a/nx/lib/nx/lin_alg/block_eigh.ex b/nx/lib/nx/lin_alg/block_eigh.ex
@@ -12,65 +12,8 @@ defmodule Nx.LinAlg.BlockEigh do
 
   import Nx.Defn
 
-  defn calc_rot(tl, tr, br) do
-    complex? = tl |> Nx.type() |> Nx.Type.complex?()
-    br = Nx.take_diagonal(br) |> Nx.real()
-    tr = Nx.take_diagonal(tr)
-    tl = Nx.take_diagonal(tl) |> Nx.real()
-
-    {tr, w} =
-      if complex? do
-        abs_tr = Nx.abs(tr)
-        {abs_tr, Nx.select(abs_tr == 0, 1, Nx.conjugate(tr) / abs_tr)}
-      else
-        {tr, 1}
-      end
-
-    z_tr = Nx.equal(tr, 0)
-    s_tr = Nx.select(z_tr, 1, tr)
-    tau = Nx.select(z_tr, 0, (br - tl) / (2 * s_tr))
-
-    t = Nx.sqrt(1 + tau ** 2)
-
-    t = 1 / (tau + Nx.select(tau >= 0, t, -t))
-
-    pred = Nx.abs(tr) <= 1.0e-5 * Nx.min(Nx.abs(br), Nx.abs(tl))
-    t = Nx.select(pred, Nx.tensor(0, type: tl.type), t)
-
-    c = 1.0 / Nx.sqrt(1.0 + t ** 2)
-    s = if complex?, do: Nx.complex(t * c, 0) * w, else: t * c
-
-    rt1 = tl - t * tr
-    rt2 = br + t * tr
-    {rt1, rt2, c, s}
-  end
-
-  defn sq_norm(tl, tr, bl, br) do
-    Nx.sum(Nx.abs(tl) ** 2 + Nx.abs(tr) ** 2 + Nx.abs(bl) ** 2 + Nx.abs(br) ** 2)
-  end
-
-  defn off_norm(tl, tr, bl, br) do
-    {n, _} = Nx.shape(tl)
-    diag = Nx.broadcast(0, {n})
-    o_tl = Nx.put_diagonal(tl, diag)
-    o_br = Nx.put_diagonal(br, diag)
-
-    sq_norm(o_tl, tr, bl, o_br)
-  end
-
-  @doc """
-  Calculates the Frobenius norm and the norm of the off-diagonals from
-  the submatrices. Used to calculate convergeance.
-  """
-  defn norms(tl, tr, bl, br) do
-    frob = sq_norm(tl, tr, bl, br)
-    off = off_norm(tl, tr, bl, br)
-
-    {frob, off}
-  end
-
   defn eigh(matrix, opts \\ []) do
-    opts = keyword!(opts, eps: 1.0e-6, max_iter: 15)
+    opts = keyword!(opts, eps: 1.0e-6, max_iter: 100)
 
     matrix
     |> Nx.revectorize([collapsed_axes: :auto],
@@ -80,17 +23,6 @@ defmodule Nx.LinAlg.BlockEigh do
     |> revectorize_result(matrix)
   end
 
-  deftransformp revectorize_result({eigenvals, eigenvecs}, a) do
-    shape = Nx.shape(a)
-
-    {
-      Nx.revectorize(eigenvals, a.vectorized_axes,
-        target_shape: Tuple.delete_at(shape, tuple_size(shape) - 1)
-      ),
-      Nx.revectorize(eigenvecs, a.vectorized_axes, target_shape: shape)
-    }
-  end
-
   defnp decompose(matrix, opts) do
     {n, _} = Nx.shape(matrix)
 
@@ -105,31 +37,30 @@ defmodule Nx.LinAlg.BlockEigh do
     eps = opts[:eps]
     max_iter = opts[:max_iter]
 
-    out_type = Nx.Type.to_floating(Nx.type(matrix))
-    matrix = Nx.as_type(matrix, out_type)
+    type = Nx.Type.to_floating(Nx.type(matrix))
+    matrix = Nx.as_type(matrix, type)
     {n, _} = Nx.shape(matrix)
     i_n = n - 1
-    # TO-DO: use a deftransform to calculate this without slicing
-    {mid, _} = Nx.shape(matrix[[0..i_n//2, 0..i_n//2]])
+    mid = calculate_mid(i_n)
     i_mid = mid - 1
 
-    {tl, tr, bl, br} =
-      {matrix[[0..i_mid, 0..i_mid]], matrix[[0..i_mid, mid..i_n]], matrix[[mid..i_n, 0..i_mid]],
-       matrix[[mid..i_n, mid..i_n]]}
+    tl = matrix[[0..i_mid, 0..i_mid]]
+    tr = matrix[[0..i_mid, mid..i_n]]
+    bl = matrix[[mid..i_n, 0..i_mid]]
+    br = matrix[[mid..i_n, mid..i_n]]
 
     # Pad if not even
-    {tl, tr, bl, br} =
+    {tr, bl, br} =
       if Nx.remainder(n, 2) == 1 do
         tr = Nx.pad(tr, 0, [{0, 0, 0}, {0, 1, 0}])
         bl = Nx.pad(bl, 0, [{0, 1, 0}, {0, 0, 0}])
         br = Nx.pad(br, 0, [{0, 1, 0}, {0, 1, 0}])
-        {tl, tr, bl, br}
+        {tr, bl, br}
       else
-        {tl, tr, bl, br}
+        {tr, bl, br}
       end
 
     # Initialze tensors to hold eigenvectors
-    type = tl |> Nx.type() |> Nx.Type.to_floating()
     v_tl = v_br = Nx.eye(mid, type: type)
     v_tr = v_bl = Nx.broadcast(Nx.tensor(0, type: type), {mid, mid})
 
@@ -145,7 +76,7 @@ defmodule Nx.LinAlg.BlockEigh do
     # all sub matrices to share the needed values.
     {{tl, br, v_tl, v_tr, v_bl, v_br}, _} =
       while {{tl, br, v_tl, v_tr, v_bl, v_br}, {frob_norm, off_norm, tr, bl, i = 0}},
-            off_norm > Nx.pow(eps, 2) * frob_norm and i < max_iter do
+            off_norm > eps ** 2 * frob_norm and i < max_iter do
         {tl, tr, bl, br, v_tl, v_tr, v_bl, v_br} =
           perform_sweeps(tl, tr, bl, br, v_tl, v_tr, v_bl, v_br, mid, i_n)
 
@@ -180,57 +111,126 @@ defmodule Nx.LinAlg.BlockEigh do
     {w, v}
   end
 
+  deftransformp calculate_mid(i_n) do
+    Range.size(0..i_n//2)
+  end
+
+  defnp calc_rot(tl, tr, br) do
+    complex? = tl |> Nx.type() |> Nx.Type.complex?()
+    br = Nx.take_diagonal(br) |> Nx.real()
+    tr = Nx.take_diagonal(tr)
+    tl = Nx.take_diagonal(tl) |> Nx.real()
+
+    {tr, w} =
+      if complex? do
+        abs_tr = Nx.abs(tr)
+        {abs_tr, Nx.select(abs_tr == 0, 1, Nx.conjugate(tr) / abs_tr)}
+      else
+        {tr, 1}
+      end
+
+    z_tr = Nx.equal(tr, 0)
+    s_tr = Nx.select(z_tr, 1, tr)
+    tau = Nx.select(z_tr, 0, (br - tl) / (2 * s_tr))
+
+    t = Nx.sqrt(1 + tau ** 2)
+
+    t = 1 / (tau + Nx.select(tau >= 0, t, -t))
+
+    pred = Nx.abs(tr) <= 1.0e-5 * Nx.min(Nx.abs(br), Nx.abs(tl))
+    t = Nx.select(pred, Nx.tensor(0, type: tl.type), t)
+
+    c = 1.0 / Nx.sqrt(1.0 + t ** 2)
+    s = if complex?, do: Nx.complex(t * c, 0) * w, else: t * c
+
+    rt1 = tl - t * tr
+    rt2 = br + t * tr
+    {rt1, rt2, c, s}
+  end
+
+  defnp sq_norm(tl, tr, bl, br) do
+    Nx.sum(Nx.abs(tl) ** 2 + Nx.abs(tr) ** 2 + Nx.abs(bl) ** 2 + Nx.abs(br) ** 2)
+  end
+
+  defnp off_norm(tl, tr, bl, br) do
+    {n, _} = Nx.shape(tl)
+    diag = Nx.broadcast(0, {n})
+    o_tl = Nx.put_diagonal(tl, diag)
+    o_br = Nx.put_diagonal(br, diag)
+
+    sq_norm(o_tl, tr, bl, o_br)
+  end
+
+  # Calculates the Frobenius norm and the norm of the off-diagonals from
+  # the submatrices. Used to calculate convergeance.
+  defnp norms(tl, tr, bl, br) do
+    frob = sq_norm(tl, tr, bl, br)
+    off = off_norm(tl, tr, bl, br)
+
+    {frob, off}
+  end
+
+  deftransformp revectorize_result({eigenvals, eigenvecs}, a) do
+    shape = Nx.shape(a)
+
+    {
+      Nx.revectorize(eigenvals, a.vectorized_axes,
+        target_shape: Tuple.delete_at(shape, tuple_size(shape) - 1)
+      ),
+      Nx.revectorize(eigenvecs, a.vectorized_axes, target_shape: shape)
+    }
+  end
+
   defnp perform_sweeps(tl, tr, bl, br, v_tl, v_tr, v_bl, v_br, mid, i_n) do
     while {tl, tr, bl, br, v_tl, v_tr, v_bl, v_br}, _n <- 0..i_n do
       {rt1, rt2, c, s} = calc_rot(tl, tr, br)
       # build row and column vectors for parrelelized rotations
-      c_v = Nx.reshape(c, {mid, 1})
-      s_v = Nx.reshape(s, {mid, 1})
-      c_h = Nx.reshape(c, {1, mid})
-      s_h = Nx.reshape(s, {1, mid})
+      c_v = Nx.new_axis(c, 1)
+      s_v = Nx.new_axis(s, 1)
+      c_h = Nx.new_axis(c, 0)
+      s_h = Nx.new_axis(s, 0)
 
-      s_conj =
+      s_v_conj =
         if Nx.type(s) |> Nx.Type.complex?() do
           Nx.conjugate(s_v)
         else
           s_v
         end
 
+      s_h_conj = Nx.transpose(s_v_conj)
+
+      # Each rotation group below is performed based on the same
+      # tl, bl, tr, br values, so we must do single-expr
+      # assignments (i.e. {tl, tr, bl, br} = ...)
+
       # Rotate rows
       {tl, tr, bl, br} = {
-        tl * c_v - bl * s_conj,
-        tr * c_v - br * s_conj,
+        tl * c_v - bl * s_v_conj,
+        tr * c_v - br * s_v_conj,
         tl * s_v + bl * c_v,
         tr * s_v + br * c_v
       }
 
-      s_conj =
-        if Nx.type(s) |> Nx.Type.complex?() do
-          Nx.conjugate(s_h)
-        else
-          s_h
-        end
-
       # Rotate cols
       {tl, tr, bl, br} = {
         tl * c_h - tr * s_h,
-        tl * s_conj + tr * c_h,
+        tl * s_h_conj + tr * c_h,
         bl * c_h - br * s_h,
-        bl * s_conj + br * c_h
+        bl * s_h_conj + br * c_h
       }
 
       # Store results and permute values across sub matrices
+      zero_diag = Nx.broadcast(0, {mid})
       tl = Nx.put_diagonal(tl, rt1)
-      tr = Nx.put_diagonal(tr, Nx.broadcast(0, {mid}))
-      bl = Nx.put_diagonal(bl, Nx.broadcast(0, {mid}))
+      tr = Nx.put_diagonal(tr, zero_diag)
+      bl = Nx.put_diagonal(bl, zero_diag)
       br = Nx.put_diagonal(br, rt2)
 
       {tl, tr} = permute_cols_in_row(tl, tr)
       {bl, br} = permute_cols_in_row(bl, br)
       {tl, bl} = permute_rows_in_col(tl, bl)
       {tr, br} = permute_rows_in_col(tr, br)
 
-      s_v_conj = if Nx.type(s_v) |> Nx.Type.complex?(), do: Nx.conjugate(s_v), else: s_v
       # Rotate to calc vectors
       {v_tl, v_tr, v_bl, v_br} = {
         v_tl * c_v - v_bl * s_v_conj,
@@ -282,7 +282,7 @@ defmodule Nx.LinAlg.BlockEigh do
     {top_out, bottom_out}
   end
 
-  defn permute_cols_in_row(left, right) do
+  defnp permute_cols_in_row(left, right) do
     {k, _} = Nx.shape(left)
 
     {left_out, right_out} =
diff --git a/nx/test/nx/lin_alg_test.exs b/nx/test/nx/lin_alg_test.exs
@@ -647,7 +647,7 @@ defmodule Nx.LinAlgTest do
               rand = :rand.uniform() * magnitude * 0.1 + magnitude
               rand * sign
             end)
-            |> Nx.tensor(type: :f64)
+            |> Nx.tensor(type: type)
 
           evals_test_diag =
             evals_test
@@ -664,10 +664,10 @@ defmodule Nx.LinAlgTest do
             |> Nx.dot([2], [0], q, [1], [0])
 
           # Eigenvalues and eigenvectors
-          assert {evals, evecs} = Nx.LinAlg.eigh(a, max_iter: 100_000, eps: 1.0e-8)
+          assert {evals, evecs} = Nx.LinAlg.eigh(a, eps: 1.0e-8)
 
           assert_all_close(evals_test, evals[0], atol: 1.0e-1)
-          # assert_all_close(evals_test, evals[1], atol: 1.0e-1)
+          assert_all_close(evals_test, evals[1], atol: 1.0e-1)
 
           evals =
             evals
@@ -679,7 +679,7 @@ defmodule Nx.LinAlgTest do
           evecs_evals = Nx.dot(evecs, [2], [0], evals, [1], [0])
           a_evecs = Nx.dot(evecs_evals, [2], [0], Nx.LinAlg.adjoint(evecs), [1], [0])
 
-          assert_all_close(a, a_evecs, atol: 1.0e-1)
+          assert_all_close(a, a_evecs, atol: 1.0e-8)
           key
       end
     end