From 199ad701e366f2c24fa8b03bafe223a537aa52ee Mon Sep 17 00:00:00 2001
From: Marcel Koch <marcel.koch@kit.edu>
Date: Wed, 27 Nov 2024 16:46:30 +0100
Subject: [PATCH] add slides for session 2024-11-20

---
 2024-11-20.md | 281 +++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 235 insertions(+), 46 deletions(-)
diff --git a/2024-11-20.md b/2024-11-20.md
index 01b8490..09af69e 100644
--- a/2024-11-20.md
+++ b/2024-11-20.md
@@ -1,70 +1,259 @@
 ---
-title: User Meeting
-subtitle: What's going on?
-author: Marcel Koch
-date: 20.11.2024
-theme: solarized
-document-css: true
-maxwidth: 100%
-linestretch: 5.0
+marp: true
+theme: ginkgo
+headingDivider: 2
 ---
 
-## Intro
+# Basic Concepts in Ginkgo
 
-some textaf some textaf 
+<!-- _class: lead -->
 
-### sub page
+Early-Adopter Session 27.11.2024
 
-### other sub page
+## Topics
 
-```js
-let a = 1;
-let b = 2;
-let c = x => 1 + 2 + x;
-c(3);
-```
+- Concept: Executors
+- Concept: LinOps
+- Hello World!
+- Creating Matrices
+- Concept: Factories
+
+## What is Ginkgo?
 
+Numerical Sparse Linear Algebra Library
+https://github.com/ginkgo-project/ginkgo
 
+Supports GPUs and Manycore architecture
 
-## What is Ginkgo?
+Focus on Portability and Composability
 
-- Numerical Sparse Linear Algebra Library https://github.com/ginkgo-project/ginkgo
-- Supports GPUs and Manycore architecture
-- Focus on Portability and Composability
-- Written in modern C++
-- Comprehensive test and benchmark framework
-- Integrations into MFEM, deal.II, OpenFOAM, ...
-- Available on Spack
+Written in modern C++
 
+Comprehensive test and benchmark framework
 
+Integrations into MFEM, deal.II, OpenFOAM, ...
+
+Available on Spack
 
 ## Basic Concepts: Executor
 
 Represents a single device to run HPC code on:
 
-Singlecore CPU (Reference)
+- Singlecore CPU (Reference)
+  `auto exec = gko::ReferenceExecutor::create();`
+- Multicore CPU (OpenMP)
+  `auto exec = gko::OmpExecutor::create();`
 
-`auto exec = gko::ReferenceExecutor::create();`{.cpp}
 
-- Multicore CPU (OpenMP)
-  ```cpp
-  auto exec = gko::OmpExecutor::create();
-  ```
+## Basic Concepts: Executor
+
+Represents a single device to run HPC code on:
+
 - NVIDIA GPU (CUDA)
-  ```cpp
-  auto host_exec = gko::OmpExecutor();
-  auto exec = gko::CudaExecutor::create(device_id, 
-                                        host_exec);
+  ```c++
+  auto host_exec = gko::OmpExecutor::create();
+  auto exec = gko::CudaExecutor::create(device_id, host_exec);
   ```
 - AMD GPU (HIP/ROCm)
-  ```cpp
-  auto host_exec = gko::OmpExecutor();
-  auto exec = gko::HipExecutor::create(device_id, 
-                                       host_exec);
+  ```c++
+  auto host_exec = gko::OmpExecutor::create();
+  auto exec = gko::HipExecutor::create(device_id, host_exec);
+  ```
+- Intel GPU (SYCL)
+  ```c++
+  auto host_exec = gko::OmpExecutor::create();
+  auto exec = gko::DpcppExecutor::create(device_id, host_exec);
+  ```
+
+## Basic Concepts: Executor
+
+Responsibilities:
+
+- Memory allocation
+- Copying data
+- Executing kernels
+
+Passed to other functions, not used directly!
+```c++
+void gko::func(std::shared_ptr<const gko::Executor> exec, ...);
+```
+
+## Basic Concepts: LinOp
+
+
+<style scoped>
+section ul,
+section p {
+  font-size: 27px
+}
+</style>
+
+Linear Operator representing:
+
+- Matrices (Dense, Sparse CSR, COO, ELL, ...)
+  $$y = A\cdot x$$
+- Solvers (Krylov CG, GMRES,..., Direct, Triangular)
+  $$S = A^{-1}, y = S\cdot x$$
+- Preconditioners (Block-Jacobi, Multigrid, ILU(0), ...)
+  $$M \approx A^{-1}, y = M\cdot x$$
+- Compositions and linear combination
+  $$ C = A\cdot B, y = C\cdot x $$
+  $$ C = \alpha A + \beta B, y = C\cdot x$$
+- Factorizations (ILU, LU, Cholesky, ...)
+  $$ L\cdot U = A, y = LU \cdot x$$
+
+## Basic Concepts: LinOp
+
+- Functionality:
+  - Matrix (multi-)vector products
+    ```cpp
+    A->apply(x, y);               // y = A * x
+    A->apply(alpha, x, beta, y);  // y = alpha * A * x + beta * y
+    ```
+- Properties:
+  - Executor
+    - Where is the data stored?
+    - Where are operations executed?
+  - Dimensions $n \times m$
+- Compatibility:
+  - Ensures all inputs are on the operator's executor
+
+## Creating Matrices
+
+Equivalent across all matrix formats:
+
+- From C++ initializer lists (scalars, tiny matrices)
+  `auto mtx = gko::initialize<Mtx>({{0.0, 1.0}, {1.0, 0.0}}, exec);`
+- From MatrixMarket files
+  `auto mtx = gko::read<Mtx>(std::ifstream{"A.mtx"}, exec);`
+- From an existing matrix
+  ```c++
+  auto mtx = other_mtx->clone();
+  auto mtx = gko::clone(exec, mtx);
+  ```
+- From a matrix in a different format
+  ```c++
+  auto mtx = Mtx::create(exec);
+  other_mtx->convert_to(mtx);
+  ```
+
+## Matrix Assembly
+
+<style scoped>
+section pre,
+section ul {
+  margin: 0.5em 0 0;
+}
+section pre > code {
+  font-size: 18px;
+}
+section p,
+section li{
+  font-size: 24px
+}
+</style>
+
+Three equivalent ways of specifying matrix values:
+
+- `gko::matrix_data` for sequential assembly (no duplicates)
+  ```c++
+  gko::matrix_data<> data{gko::dim<2>{num_rows, num_cols}};
+  data.nonzeros.emplace_back(row, column, value);
+  mtx->read(data);
+  ```
+- `gko::matrix_assembly_data` for sequential assembly
+  ```c++
+  gko::matrix_assembly_data<> data{gko::dim<2>{num_rows, num_cols}};
+  data.set_value(row, column, value);
+  data.add_value(row, column, value);
+  mtx->read(data);
+  ```
+- `gko::device_matrix_data` for high-performance assembly
+  ```c++
+  device_matrix_data<> data{exec, dim<2>{nrows, ncols}, nnz};
+  run_assembly_kernel(data.get_row_idxs(),
+                      data.get_col_idxs(),
+                      data.get_values()); // runs on device
+  data.sum_duplicates(); // optional, e.g. for FEM edge DoFs
+  mtx->read(data);
   ```
-- Intel GPU/CPU (SYCL)
-  ```cpp
-  auto host_exec = gko::OmpExecutor();
-  auto exec = gko::DpcppExecutor::create(device_id, 
-                                         host_exec);
-  ```
\ No newline at end of file
+
+## Hello World
+
+```c++
+#include <ginkgo/ginkgo.hpp>
+
+int main() {
+    using Mtx = gko::matrix::Csr<double, int>;
+    using Vec = gko::matrix::Dense<double>;
+    auto exec = gko::ReferenceExecutor::create();
+    auto mtx = gko::initialize<Mtx>({{0.0, 1.0}, {1.0, 0.0}}, exec);
+    auto vec = gko::initialize<Vec>({2.0, 3.0}, exec);
+    auto result = vec->clone();
+    mtx->apply(vec, result);
+    gko::write(std::cout, result);
+}
+```
+
+## Changing Executors
+
+```c++
+#include <ginkgo/ginkgo.hpp>
+
+int main() {
+    using Mtx = gko::matrix::Csr<double, int>;
+    using Vec = gko::matrix::Dense<double>;
+    auto exec = gko::CudaExecutor(0, gko::OmpExecutor::create());
+    auto mtx = gko::initialize<Mtx>({{0.0, 1.0}, {1.0, 0.0}}, exec);
+    auto vec = gko::initialize<Vec>({2.0, 3.0}, exec);
+    auto result = vec->clone();
+    mtx->apply(vec, result);
+    gko::write(std::cout, result);
+}
+```
+
+## Explicit Types
+
+```c++
+#include <ginkgo/ginkgo.hpp>
+
+int main() {
+    using Mtx = gko::matrix::Csr<double, int>;
+    using Vec = gko::matrix::Dense<double>;
+    std::shared_ptr<gko::CudaExecutor> exec = 
+    	gko::CudaExecutor::create(0, gko::OmpExecutor::create());
+    std::unique_ptr<Mtx> mtx = gko::initialize<Mtx>({{0.0, 1.0}, {1.0, 0.0}}, 
+                                                    exec);
+    std::unique_ptr<Vec> vec = gko::initialize<Vec>({2.0, 3.0}, exec);
+    std::unique_ptr<Vec> result = vec->clone();
+    mtx->apply(vec, result);
+    gko::write(std::cout, result);
+}
+```
+
+## Matrix Assembly
+
+```c++
+#include <ginkgo/ginkgo.hpp>
+
+int main() {
+    using Mtx = gko::matrix::Csr<double, int>;
+    using Vec = gko::matrix::Dense<double>;
+    using dim = gko::dim<2>;
+    using matrix_data = gko::matrix_data<double, int>;
+    auto exec = gko::ReferenceExecutor::create();
+    matrix_data data{dim{10, 10}};
+    auto mtx = Mtx::create(exec);
+    auto x = Vec::create(exec, dim{10, 1});
+    auto y = Vec::create(exec, dim{10, 1});
+    for (int row = 0; row < 10; row++) {
+        data.nonzeros.emplace_back(row, row, 2.0);
+        data.nonzeros.emplace_back(row, (row + 1) % 10, -1.0);
+        data.nonzeros.emplace_back(row, (row + 9) % 10, -1.0);
+        x->at(row, 0) = 1.0; // only works on CPU executors
+    }
+    mtx->read(data);
+    mtx->apply(x, y);
+    gko::write(std::cout, y);
+}
+```