dynamicslab · cl126162 · Apr 10, 2026
diff --git a/examples/nek/.gitignore b/examples/nek/.gitignore
@@ -0,0 +1,9 @@
+# Runtime log and output files
+log.run
+log.test
+log.train
+getting_started/*/train_run/
+*.f[0-9]*
+*.npz
+core.*
+SESSION.NAME
diff --git a/examples/nek/getting_started/1_nekenv_single/run_nekenv_docker.sh b/examples/nek/getting_started/1_nekenv_single/run_nekenv_docker.sh
@@ -42,9 +42,10 @@ if [ "$MODE" == "train" ]; then
         --work-dir "$WORK_DIR" \
         --cache-dir "$HOME/.cache/hydrogym"
 
+
     cd "$WORK_DIR" || exit 1
 
-    mpirun \
+    mpirun --use-hwthread-cpus\
         -np 1 python ../train_sb3_nek_direct.py \
             --env "$ENV_NAME" \
             --local-dir "$LOCAL_DIR" \
@@ -63,11 +64,12 @@ else
         --local-dir "$LOCAL_DIR" \
         --env "$ENV_NAME" \
         --work-dir "$WORK_DIR" \
-        --cache-dir "$HOME/.cache/hydrogym"
+        --cache-dir "$HOME/.cache/hydrogym" \
+        --restart-index 1
 
     cd "$WORK_DIR" || exit 1
 
-    mpirun \
+    mpirun --use-hwthread-cpus\
         -np 1 python ../test_nek_direct.py \
             --env "$ENV_NAME" \
             --local-dir "$LOCAL_DIR" \

diff --git a/examples/nek/getting_started/1_nekenv_single/test_nek_direct.py b/examples/nek/getting_started/1_nekenv_single/test_nek_direct.py
@@ -25,9 +25,10 @@
     - Zero control: action = 0 (baseline test)
 """
 
-import sys
 import argparse
+import sys
 from pathlib import Path
+
 import numpy as np
 
 from hydrogym.nek import NekEnv
@@ -58,6 +59,13 @@ def main():
     # Direct instantiation
     env = NekEnv(env_config=env_config)
 
+    # Modify the par file to ensure the simulation configuration is correct
+    from hydrogym.nek.nek_lib.nek_utils import NEK_INIT
+
+    nek_init = NEK_INIT(nek=env.conf.simulation, drl=env.conf.runner, rank_folder=env.run_folder)
+    nek_init.rewrite_REA_v19()  # Rewrite the par file, v19 corresponds to the new Nek5000 format
+    # The simulation will be reset, so the par file is to be written out at this point
+
     print("\nEnvironment info:")
     print("=" * 80)
     print(f"  Observation space: {env.observation_space.shape}")
@@ -75,17 +83,17 @@ def main():
     print(f"\nRunning {max_steps} steps with zero control...")
 
     total_reward = 0.0
-    action_dim = env.action_space.shape[0]
+    # action_dim = env.action_space.shape[0] --> uncomment when needed
 
     for step in range(max_steps):
         # Define action (example: zero control - baseline)
-        action = np.zeros(action_dim, dtype=np.float32)
+        # action = np.zeros(action_dim, dtype=np.float32)
 
         # OR use constant blowing:
         # action = np.ones(action_dim, dtype=np.float32) * 0.01
 
-        # OR use opposition control:
-        # action = -obs[:action_dim]
+        # Oppose to the wall-normal vel, as the observation is staggered so we sort the even indices
+        action = -obs[1::2]
 
         # Step environment
         obs, reward, terminated, truncated, info = env.step(action)

diff --git a/examples/nek/getting_started/1_nekenv_single/train_sb3_nek_direct.py b/examples/nek/getting_started/1_nekenv_single/train_sb3_nek_direct.py
@@ -4,10 +4,10 @@
 Includes Monitor, DummyVecEnv, TensorBoard, and VecNormalize best practices.
 """
 
-import sys
 import argparse
-from pathlib import Path
+import sys
 from datetime import datetime
+from pathlib import Path
 
 from hydrogym.nek import NekEnv
 
@@ -28,9 +28,9 @@ def train_single_agent(args):
 
     # Import SB3 components
     try:
+        from stable_baselines3.common.callbacks import CheckpointCallback
         from stable_baselines3.common.monitor import Monitor
         from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
-        from stable_baselines3.common.callbacks import CheckpointCallback
 
         if args.algo == "PPO":
             from stable_baselines3 import PPO as Algorithm
@@ -53,6 +53,12 @@ def make_env():
             "configuration_file": args.config_file,
         }
         env = NekEnv(env_config=env_config)
+
+        # Modify the par file to ensure the simulation configuration is correct before training
+        from hydrogym.nek.nek_lib.nek_utils import NEK_INIT
+
+        nek_init = NEK_INIT(nek=env.conf.simulation, drl=env.conf.runner, rank_folder=env.run_folder)
+        nek_init.rewrite_REA_v19()  # Rewrite the par file, v19 corresponds to the new Nek5000 format
         env = Monitor(env)  # CRITICAL: Enables episode reward/length logging
         return env
 
@@ -61,6 +67,8 @@ def make_env():
 
     # 3. Apply VecNormalize (Crucial for Fluid Dynamics)
     # This scales inputs to mean 0, std 1 so the Neural Net learns faster.
+    # VecNormalize is not used in the literature, so it is not guaranteed to work.
+    # Please see MARL set for more details.
     env = VecNormalize(env, norm_obs=True, norm_reward=True, clip_obs=10.0)
 
     print("Environment created (Wrapped in Monitor, DummyVecEnv, VecNormalize):")

diff --git a/examples/nek/getting_started/2_parallel_env/run_parallel_docker.sh b/examples/nek/getting_started/2_parallel_env/run_parallel_docker.sh
@@ -22,7 +22,7 @@ WORK_DIR="./train_run"
 LOCAL_DIR="/workspace/hydrogym/packaged_envs"
 ENV_NAME="TCFmini_3D_Re180"
 NPROC_NEK=10
-NUM_STEPS=100
+NUM_STEPS=1000
 TOTAL_TIMESTEPS=50000
 MODE="${1:-test}"  # test or train
 
@@ -44,7 +44,7 @@ if [ "$MODE" == "train" ]; then
 
     cd "$WORK_DIR" || exit 1
 
-    mpirun \
+    mpirun --use-hwthread-cpus\
         -np 1 python ../train_sb3_parallel.py \
             --env "$ENV_NAME" \
             --local-dir "$LOCAL_DIR" \
@@ -66,7 +66,7 @@ else
 
     cd "$WORK_DIR" || exit 1
 
-    mpirun \
+    mpirun --use-hwthread-cpus\
         -np 1 python ../test_nek_parallel.py \
             --env "$ENV_NAME" \
             --local-dir "$LOCAL_DIR" \

diff --git a/examples/nek/getting_started/2_parallel_env/test_nek_parallel.py b/examples/nek/getting_started/2_parallel_env/test_nek_parallel.py
@@ -23,12 +23,14 @@
     - Each agent receives observations from sensors near its actuator
 """
 
-import sys
 import argparse
+import sys
 from pathlib import Path
+
 import numpy as np
 
 from hydrogym.nek import NekEnv
+from hydrogym.nek.nek_lib.nek_utils import NEK_INIT
 from hydrogym.nek.parallel_env import NekParallelEnv
 
 
@@ -56,6 +58,10 @@ def main():
     }
 
     base_env = NekEnv(env_config=env_config)
+    nek_init = NEK_INIT(nek=base_env.conf.simulation, drl=base_env.conf.runner, rank_folder=base_env.run_folder)
+
+    # Rewrite the par file, v19 corresponds to the new Nek5000 format
+    nek_init.rewrite_REA_v19()
 
     # Wrap with parallel multi-agent environment
     env = NekParallelEnv(base_env)
@@ -98,8 +104,8 @@ def main():
         # Strategy 2: Uniform blowing (uncomment to test)
         # actions = {agent: np.ones(1, dtype=np.float32) * 0.01 for agent in env.agents}
 
-        # Strategy 3: Opposition control per agent (uncomment to test)
-        # actions = {agent: -obs_dict[agent][:1] for agent in env.agents}
+        # Strategy 3: Opposition control per agent, opposing the wall-normal velocity in this case
+        actions = {agent: -obs_dict[agent][1:] for agent in env.agents}
 
         # Step environment
         obs_dict, rewards_dict, terminated_dict, truncated_dict, infos_dict = env.step(actions)

diff --git a/examples/nek/getting_started/2_parallel_env/train_sb3_parallel.py b/examples/nek/getting_started/2_parallel_env/train_sb3_parallel.py
@@ -10,14 +10,16 @@
 For production, see chapter 3 (PettingZoo + SuperSuit).
 """
 
-import sys
 import argparse
-from pathlib import Path
+import sys
 from datetime import datetime
-import numpy as np
+from pathlib import Path
+
 import gymnasium as gym
+import numpy as np
 
 from hydrogym.nek import NekEnv, NekParallelEnv
+from hydrogym.nek.nek_lib.nek_utils import NEK_INIT
 
 
 class CentralizedParallelWrapper(gym.Env):
@@ -127,6 +129,11 @@ def train_parallel_centralized(args):
     }
     base_env = NekEnv(env_config=env_config)
 
+    # Rewrite the parameter file to ensure the simulation configuration is correct, and
+    # complies with the v19 Nek5000 format
+    nek_init = NEK_INIT(nek=base_env.conf.simulation, drl=base_env.conf.runner, rank_folder=base_env.run_folder)
+    nek_init.rewrite_REA_v19()
+
     # Wrap with parallel interface (dict-based)
     print("Wrapping with NekParallelEnv (dict-based)...")
     parallel_env = NekParallelEnv(base_env)
@@ -143,9 +150,9 @@ def train_parallel_centralized(args):
 
     # Import SB3
     try:
+        from stable_baselines3.common.callbacks import CheckpointCallback
         from stable_baselines3.common.monitor import Monitor
         from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
-        from stable_baselines3.common.callbacks import CheckpointCallback
 
         if args.algo == "PPO":
             from stable_baselines3 import PPO as Algorithm

diff --git a/examples/nek/getting_started/3_pettingzoo/README.md b/examples/nek/getting_started/3_pettingzoo/README.md
@@ -38,6 +38,46 @@ mpirun -np 1 python test_nek_pettingzoo.py --steps 100 : -np 10 nek5000
 mpirun -np 1 python train_sb3_pettingzoo.py --env MiniChannel_Re180 --algo PPO --total-timesteps 100000 : -np 10 nek5000
 ```
 
+## Configuration-Driven Tutorial (Recommended for Reproducibility)
+
+Use a fixed YAML config to lock simulation + runner settings across runs.
+
+### 1) Prepare a workspace
+```bash
+python ../prepare_workspace.py \
+  --local-dir ../../../packaged_envs \
+  --env TCFmini_3D_Re180 \
+  --work-dir ./train_run
+```
+
+### 2) Train with a config file
+```bash
+cd train_run
+mpirun -np 1 python ../train_sb3_pettingzoo.py \
+  --env TCFmini_3D_Re180 \
+  --nproc 10 \
+  --config-file ../configs/pettingzoo_tcfmini_re180.yml \
+  --algo TD3 \
+  --total-timesteps 5000000 \
+  : -np 10 nek5000
+```
+
+### 3) Evaluate (PettingZoo rollouts)
+```bash
+cd train_run
+mpirun -np 1 python ../test_nek_pettingzoo.py \
+  --env TCFmini_3D_Re180 \
+  --nproc 10 \
+  --config-file ../configs/pettingzoo_tcfmini_re180.yml \
+  --steps 2500 \
+  : -np 10 nek5000
+```
+
+Notes:
+- The config lives in `examples/nek/configs/pettingzoo_tcfmini_re180.yml`.
+- Run from the workspace (`train_run`) so `compile_path: "."` resolves to case files.
+- Ensure `--nproc` matches `simulation.nproc` in the config.
+
 ## When to Use
 
 - **Production SB3 training** on multi-agent environments

diff --git a/examples/nek/getting_started/3_pettingzoo/run_pettingzoo_docker.sh b/examples/nek/getting_started/3_pettingzoo/run_pettingzoo_docker.sh
@@ -1,6 +1,7 @@
 #!/usr/bin/env bash
 #
 # Run NEK5000 PettingZoo tests with MPMD coupling.
+# Config is loaded automatically from HuggingFace (environment_config.yaml).
 #
 # Usage:
 #     ./run_pettingzoo_docker.sh                    # Test only
@@ -44,7 +45,7 @@ if [ "$MODE" == "train" ]; then
 
     cd "$WORK_DIR" || exit 1
 
-    mpirun \
+    mpirun --use-hwthread-cpus\
         -np 1 python ../train_sb3_pettingzoo.py \
             --env "$ENV_NAME" \
             --local-dir "$LOCAL_DIR" \
@@ -66,7 +67,7 @@ else
 
     cd "$WORK_DIR" || exit 1
 
-    mpirun \
+    mpirun --use-hwthread-cpus\
         -np 1 python ../test_nek_pettingzoo.py \
             --env "$ENV_NAME" \
             --local-dir "$LOCAL_DIR" \

diff --git a/examples/nek/getting_started/3_pettingzoo/test_nek_pettingzoo.py b/examples/nek/getting_started/3_pettingzoo/test_nek_pettingzoo.py
@@ -23,9 +23,10 @@
     - Actions and observations are dicts with agent names as keys
 """
 
-import sys
 import argparse
+import sys
 from pathlib import Path
+
 import numpy as np
 
 from hydrogym.nek import NekEnv
@@ -115,6 +116,8 @@ def main():
         # Strategy 3: Cooperative strategy - all agents use same signal
         # signal = np.sin(step * 0.1) * 0.01
         # actions = {agent: np.array([signal], dtype=np.float32) for agent in env.agents}
+        # Opposition Control Strategy, oppose to the wall-normal velocity (-1)
+        actions = {agent: -1.0 * obs_dict[agent][:-1] for agent in env.agents}
 
         # Step environment
         obs_dict, rewards_dict, terminated_dict, truncated_dict, infos_dict = env.step(actions)