diff --git a/3.test_cases/megatron/nemo/Dockerfile b/3.test_cases/megatron/nemo/Dockerfile
index e10cbfc57..21d8637cf 100644
--- a/3.test_cases/megatron/nemo/Dockerfile
+++ b/3.test_cases/megatron/nemo/Dockerfile
@@ -1,10 +1,19 @@
-FROM nvcr.io/nvidia/nemo:25.07.00
-ARG GDRCOPY_VERSION=v2.5
-ARG EFA_INSTALLER_VERSION=1.47.0
-# ARG AWS_OFI_NCCL_VERSION=v1.13.2-aws    # OFI NCCL already packaged into EFA installation (/opt/amazon/ofi-nccl) cf. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-changelog.html
-ARG NCCL_VERSION=v2.27.7-1
-ARG NCCL_TESTS_VERSION=v2.16.9
-ARG TRANSFORMERS_VERSION=4.56.1
+FROM nvcr.io/nvidia/nemo:26.02
+ARG GDRCOPY_VERSION=v2.5.2
+ARG EFA_INSTALLER_VERSION=1.48.0
+# AWS OFI NCCL is bundled into the EFA installation (/opt/amazon/ofi-nccl) for
+# EFA installer >=1.47.0. Cf. https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-changelog.html
+ARG AWS_OFI_NCCL_VERSION=v1.19.0
+ARG NCCL_VERSION=v2.30.4-1
+ARG NCCL_TESTS_VERSION=v2.18.3
+ARG TRANSFORMERS_VERSION=4.57.6
+# Pin megatron-core to the version that NeMo 2.7.x is API-compatible with.
+# nemo:26.02 ships megatron-core 0.16.1 at /opt/Megatron-Bridge/3rdparty/Megatron-LM/
+# but bundled NeMo 2.7.1 calls APIs 0.16.x removed (get_megatron_optimizer
+# kwargs no_weight_decay_cond/scale_lr_cond/lr_mult) and imports submodules
+# 0.16.x dropped (megatron.core.dist_checkpointing.strategies.tensorstore).
+# 0.15.3 is the latest 0.15.x release on PyPI / GitHub.
+ARG MEGATRON_CORE_VERSION=core_v0.15.3
 
 
 ARG OPEN_MPI_PATH=/opt/amazon/openmpi    # Open MPI already packaged into EFA installation (/opt/amazon/openmpi)
@@ -38,8 +47,13 @@ RUN DEBIAN_FRONTEND=noninteractive apt install -y --allow-unauthenticated \
     libtool \
     openssh-client \
     openssh-server \
-    vim \
-    && apt autoremove -y
+    vim
+# NOTE: deliberately no `apt autoremove` here. The nemo:26.02 base image
+# ships several CUDA add-on libraries (libcusparseLt0, libcudnn-frontend,
+# etc.) that apt sees as orphaned because no installed Debian package
+# Depends: them — they're loaded via dlopen by torch / transformer_engine
+# at runtime. autoremove deletes them and torch.import then crashes with
+# `libcusparseLt.so.0: cannot open shared object file`.
 
 RUN mkdir -p /var/run/sshd && \
     sed -i 's/[ #]\(.*StrictHostKeyChecking \).*/ \1no/g' /etc/ssh/ssh_config && \
@@ -53,8 +67,37 @@ RUN rm -rf /root/.ssh/ \
  && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
  && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
 
-ENV LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:/opt/amazon/openmpi/lib:/opt/nccl/build/lib:/opt/amazon/efa/lib:/opt/amazon/ofi-nccl/lib:$LD_LIBRARY_PATH
-ENV PATH=/opt/amazon/openmpi/bin/:/opt/amazon/efa/bin:/usr/bin:/usr/local/bin:$PATH
+# NGC images install the OFI NCCL plugin via the libnccl-ofi-ngc-v2 package
+# from the EFA installer, which lands at /opt/amazon/aws-ofi-nccl/ rather than
+# /opt/amazon/ofi-nccl/ used on stock Ubuntu. Cover both for portability.
+ENV LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:/opt/amazon/openmpi/lib:/opt/nccl/build/lib:/opt/amazon/efa/lib:/opt/amazon/aws-ofi-nccl/lib:/opt/amazon/ofi-nccl/lib:$LD_LIBRARY_PATH
+# Prepend /opt/venv/bin so every `python`/`pip` resolves to the uv-managed
+# venv where nemo, megatron, nemo_run, etc. live. The nemo:26.02 image's
+# default PATH puts /usr/bin BEFORE /opt/venv/bin, which breaks torchelastic
+# worker spawn: `ft_launcher` finds /usr/bin/python (no nemo_run) instead of
+# /opt/venv/bin/python and crashes with `ModuleNotFoundError: nemo_run`.
+ENV PATH=/opt/venv/bin:/opt/amazon/openmpi/bin/:/opt/amazon/efa/bin:/usr/bin:/usr/local/bin:$PATH
+
+# nemo:26.02 ships libcusparseLt0 (libcusparselt0-cuda-13) into the nested
+# /usr/lib/x86_64-linux-gnu/libcusparseLt/13/ directory but does NOT create
+# the libcusparseLt.so.0 SONAME symlink and does NOT add an ld.so.conf.d
+# entry. torch dlopens libcusparseLt.so.0 at import time and crashes:
+#   ImportError: libcusparseLt.so.0: cannot open shared object file
+# Register the dir with ld.so AND let ldconfig create the SONAME symlink.
+RUN echo /usr/lib/x86_64-linux-gnu/libcusparseLt/13 > /etc/ld.so.conf.d/000_libcusparselt.conf \
+ && ldconfig
+
+# Pin the bundled megatron-core to MEGATRON_CORE_VERSION (see ARG above for
+# rationale). Replace the in-place 0.16.1 source tree at
+# /opt/Megatron-Bridge/3rdparty/Megatron-LM/megatron/core/ with the 0.15.3
+# release. Pip's site-packages copy is updated by venv.sh on the host; this
+# step covers the copy that runs inside the container.
+RUN git clone -b ${MEGATRON_CORE_VERSION} --depth 1 \
+        https://github.com/NVIDIA/Megatron-LM.git /tmp/megatron-lm \
+ && rm -rf /opt/Megatron-Bridge/3rdparty/Megatron-LM/megatron/core \
+ && cp -r /tmp/megatron-lm/megatron/core \
+        /opt/Megatron-Bridge/3rdparty/Megatron-LM/megatron/ \
+ && rm -rf /tmp/megatron-lm
 
 #################################################
 ## Install NVIDIA GDRCopy
@@ -72,6 +115,17 @@ ENV PATH /opt/gdrcopy/bin:$PATH
 
 #################################################
 ## Install EFA installer
+##
+## The base nemo:26.02 image ships partial EFA components (efa-profile,
+## libfabric1-aws, openmpi*-aws, libnccl-ofi-ngc-v2). Their dpkg state
+## doesn't match the on-disk files, so the EFA installer's verify step
+## refuses to upgrade with "ld.so.conf.d/000_efa.conf is installed by
+## efa-profile package but doesn't exist". Purge them first.
+RUN dpkg --purge --force-all \
+    efa-profile libfabric1-aws libfabric1-aws-dbg \
+    openmpi40-aws openmpi50-aws \
+    libnccl-ofi-ngc-v2 libnccl-ofi-ngc-v2-dbgsym 2>/dev/null || true
+
 RUN cd $HOME \
     && curl -O https://efa-installer.amazonaws.com/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz \
     && tar -xf $HOME/aws-efa-installer-${EFA_INSTALLER_VERSION}.tar.gz \
@@ -132,5 +186,9 @@ ENV OMPI_MCA_pml=^cm,ucx            \
 ## Turn off PMIx Error https://github.com/open-mpi/ompi/issues/7516
 ENV PMIX_MCA_gds=hash
 
-# Debug: Verify OFI NCCL and OPENMPI installation
-RUN ls -la /opt/amazon/efa/lib/ && ls -la /opt/amazon/ofi-nccl/lib/ && ls -la /opt/amazon/openmpi/lib/
+# Verify EFA / OFI NCCL / OpenMPI installation. NGC's libnccl-ofi-ngc-v2
+# package installs to /opt/amazon/aws-ofi-nccl, while stock EFA installs
+# to /opt/amazon/ofi-nccl — accept either.
+RUN ls -la /opt/amazon/efa/lib/ \
+ && (ls -la /opt/amazon/aws-ofi-nccl/lib/ || ls -la /opt/amazon/ofi-nccl/lib/) \
+ && ls -la /opt/amazon/openmpi/lib/
diff --git a/3.test_cases/megatron/nemo/kubernetes/Dockerfile b/3.test_cases/megatron/nemo/kubernetes/Dockerfile
index bcb2f9c2b..43d8c6774 100644
--- a/3.test_cases/megatron/nemo/kubernetes/Dockerfile
+++ b/3.test_cases/megatron/nemo/kubernetes/Dockerfile
@@ -1,10 +1,12 @@
-FROM nvcr.io/nvidia/nemo:25.04.01
-ARG GDRCOPY_VERSION=v2.4.1
-ARG EFA_INSTALLER_VERSION=1.37.0
-ARG AWS_OFI_NCCL_VERSION=v1.13.2-aws
-ARG NCCL_VERSION=v2.23.4-1
-ARG NCCL_TESTS_VERSION=v2.13.10
-ARG TRANSFORMERS_VERSION=4.48.1
+FROM nvcr.io/nvidia/nemo:26.02
+ARG GDRCOPY_VERSION=v2.5.2
+ARG EFA_INSTALLER_VERSION=1.48.0
+# AWS OFI NCCL is bundled into the EFA installation (/opt/amazon/ofi-nccl) for
+# EFA installer >=1.47.0; the explicit source build below is no longer required.
+ARG AWS_OFI_NCCL_VERSION=v1.19.0
+ARG NCCL_VERSION=v2.30.4-1
+ARG NCCL_TESTS_VERSION=v2.18.3
+ARG TRANSFORMERS_VERSION=4.57.6
 
 ARG OPEN_MPI_PATH=/opt/amazon/openmpi
 
@@ -52,7 +54,7 @@ RUN rm -rf /root/.ssh/ \
  && cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
  && printf "Host *\n  StrictHostKeyChecking no\n" >> /root/.ssh/config
 
-ENV LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:/opt/amazon/openmpi/lib:/opt/nccl/build/lib:/opt/amazon/efa/lib:/opt/aws-ofi-nccl/install/lib:$LD_LIBRARY_PATH
+ENV LD_LIBRARY_PATH=/usr/local/cuda/extras/CUPTI/lib64:/opt/amazon/openmpi/lib:/opt/nccl/build/lib:/opt/amazon/efa/lib:/opt/amazon/ofi-nccl/lib:$LD_LIBRARY_PATH
 ENV PATH=/opt/amazon/openmpi/bin/:/opt/amazon/efa/bin:/usr/bin:/usr/local/bin:$PATH
 
 ######################
@@ -87,25 +89,11 @@ RUN cd $HOME \
 
 
 ###################################################
-## Install AWS-OFI-NCCL plugin
-RUN DEBIAN_FRONTEND=noninteractive apt-get install -y libhwloc-dev
-#Switch from sh to bash to allow parameter expansion
-SHELL ["/bin/bash", "-c"]
-RUN curl -OL https://github.com/aws/aws-ofi-nccl/releases/download/${AWS_OFI_NCCL_VERSION}/aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v}.tar.gz \
-    && tar -xf aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v}.tar.gz \
-    && cd aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} \
-    && ./configure --prefix=/opt/aws-ofi-nccl/install \
-        --with-mpi=/opt/amazon/openmpi \
-        --with-libfabric=/opt/amazon/efa \
-        --with-cuda=/usr/local/cuda \
-        --enable-platform-aws \
-    && make -j $(nproc) \
-    && make install \
-    && cd .. \
-    && rm -rf aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v} \
-    && rm aws-ofi-nccl-${AWS_OFI_NCCL_VERSION//v}.tar.gz
-
-SHELL ["/bin/sh", "-c"]
+## AWS OFI NCCL plugin: bundled in /opt/amazon/ofi-nccl by the EFA
+## installer (>=1.47.0). The previous explicit source build is no longer
+## needed; verify the installation here.
+RUN ls -la /opt/amazon/ofi-nccl/lib/ || \
+    (echo "AWS OFI NCCL not found in EFA installation; check EFA_INSTALLER_VERSION" && exit 1)
 
 ###################################################
 RUN rm -rf /var/lib/apt/lists/*
diff --git a/3.test_cases/megatron/nemo/kubernetes/README.md b/3.test_cases/megatron/nemo/kubernetes/README.md
index 156c298d6..01fa661dc 100644
--- a/3.test_cases/megatron/nemo/kubernetes/README.md
+++ b/3.test_cases/megatron/nemo/kubernetes/README.md
@@ -124,7 +124,7 @@ Before you begin, ensure you have the following:
 
 ## 1. Building the AWS-Optimized NeMo Container for EFA Enabled Instances
 
-**If you're not using an EFA enabled instance type, you can skip this step**. Here the base NeMo image (`nvcr.io/nvidia/nemo:25.04.01`) is enhanced with AWS-specific optimizations for EFA support.
+**If you're not using an EFA enabled instance type, you can skip this step**. Here the base NeMo image (`nvcr.io/nvidia/nemo:26.02`) is enhanced with AWS-specific optimizations for EFA support.
 
 ### Build the Docker Image
 
@@ -359,7 +359,7 @@ cd data-processing/
 
 ## 5. Launching NeMo Training Jobs
 
-> **Note**: The AWS-optimized container with EFA support can only be used for EFA enabled instances. For non-EFA usage, the default NeMo container (`nvcr.io/nvidia/nemo:25.04.01`) will work fine and you can omit the `--container_image` parameter.
+> **Note**: The AWS-optimized container with EFA support can only be used for EFA enabled instances. For non-EFA usage, the default NeMo container (`nvcr.io/nvidia/nemo:26.02`) will work fine and you can omit the `--container_image` parameter.
 
 ### Overview
 
@@ -388,7 +388,7 @@ The repository provides multiple training scenarios to meet different needs:
 | `--gpus` | GPU type (e.g., L40S, H100, A10G) | L40S |
 | `--gpu-devices` | Number of GPUs per node | 4 |
 | `--efa-devices` | Number of EFA devices per node | None |
-| `--container_image` | Container image for training (required for using EFA) | nvcr.io/nvidia/nemo:25.04.01 |
+| `--container_image` | Container image for training (required for using EFA) | nvcr.io/nvidia/nemo:26.02 |
 | `--env_vars_file` | JSON file with environment variables | env_vars.json |
 | `--pvc_name` | Name of the Persistent Volume Claim to use | fsx-claim |
 | `--pvc_mount_path` | Path where the PVC should be mounted in the container | /mnt/nemo |
@@ -467,7 +467,7 @@ python pretrain_mock_dataset.py \
     --nodes 1 \
     --gpus L40S \
     --gpu-devices 4 \
-    --container_image nvcr.io/nvidia/nemo:25.04.01 \
+    --container_image nvcr.io/nvidia/nemo:26.02 \
     --env_vars_file env_vars.json \
     --pvc_name fsx-claim \
     --pvc_mount_path /mnt/nemo
@@ -496,7 +496,7 @@ python finetune_default_dataset.py \
     --nodes 1 \
     --gpus L40S \
     --gpu-devices 4 \
-    --container_image nvcr.io/nvidia/nemo:25.04.01 \
+    --container_image nvcr.io/nvidia/nemo:26.02 \
     --env_vars_file env_vars.json \
     --pvc_name fsx-claim \
     --pvc_mount_path /mnt/nemo
@@ -509,7 +509,7 @@ python finetune_default_dataset.py \
     --nodes 1 \
     --gpus L40S \
     --gpu-devices 4 \
-    --container_image nvcr.io/nvidia/nemo:25.04.01 \
+    --container_image nvcr.io/nvidia/nemo:26.02 \
     --env_vars_file env_vars.json \
     --pvc_name fsx-claim \
     --pvc_mount_path /mnt/nemo \
@@ -523,7 +523,7 @@ python finetune_default_dataset.py \
     --nodes 1 \
     --gpus L40S \
     --gpu-devices 4 \
-    --container_image nvcr.io/nvidia/nemo:25.04.01 \
+    --container_image nvcr.io/nvidia/nemo:26.02 \
     --env_vars_file env_vars.json \
     --pvc_name fsx-claim \
     --pvc_mount_path /mnt/nemo \
@@ -554,7 +554,7 @@ python finetune_custom_dataset.py \
     --nodes 1 \
     --gpus L40S \
     --gpu-devices 4 \
-    --container_image nvcr.io/nvidia/nemo:25.04.01 \
+    --container_image nvcr.io/nvidia/nemo:26.02 \
     --env_vars_file env_vars.json \
     --pvc_name fsx-claim \
     --pvc_mount_path /mnt/nemo \
@@ -570,7 +570,7 @@ python finetune_custom_dataset.py \
     --nodes 2 \
     --gpus L40S \
     --gpu-devices 4 \
-    --container_image nvcr.io/nvidia/nemo:25.04.01 \
+    --container_image nvcr.io/nvidia/nemo:26.02 \
     --env_vars_file env_vars.json \
     --pvc_name fsx-claim \
     --pvc_mount_path /mnt/nemo \
diff --git a/3.test_cases/megatron/nemo/kubernetes/data-processing/data-processing-pod-template.yaml b/3.test_cases/megatron/nemo/kubernetes/data-processing/data-processing-pod-template.yaml
index 52e03bb14..79cba7b41 100644
--- a/3.test_cases/megatron/nemo/kubernetes/data-processing/data-processing-pod-template.yaml
+++ b/3.test_cases/megatron/nemo/kubernetes/data-processing/data-processing-pod-template.yaml
@@ -8,7 +8,7 @@ spec:
   restartPolicy: Never
   containers:
   - name: nemo-processing
-    image: nvcr.io/nvidia/nemo:25.04.01
+    image: nvcr.io/nvidia/nemo:26.02
     command: ["/bin/bash"]
     args: ["-c", "sleep infinity"]
     resources:
diff --git a/3.test_cases/megatron/nemo/kubernetes/finetune_custom_dataset.py b/3.test_cases/megatron/nemo/kubernetes/finetune_custom_dataset.py
index 210c7bb57..fcf0052d9 100644
--- a/3.test_cases/megatron/nemo/kubernetes/finetune_custom_dataset.py
+++ b/3.test_cases/megatron/nemo/kubernetes/finetune_custom_dataset.py
@@ -22,7 +22,7 @@
 # function in the CustomDataModule class to match your dataset's structure
 # =============================================================================
 
-# python finetune.py --max_steps 200 --nodes 1 --gpus L40S --gpu-devices 8 --container_image nvcr.io/nvidia/nemo:24.12 --env_vars_file env_vars.json --pvc_name fsx-claim --pvc_mount_path /mnt/nemo
+# python finetune.py --max_steps 200 --nodes 1 --gpus L40S --gpu-devices 8 --container_image nvcr.io/nvidia/nemo:26.02 --env_vars_file env_vars.json --pvc_name fsx-claim --pvc_mount_path /mnt/nemo
 
 
 def get_parser():
@@ -32,7 +32,7 @@ def get_parser():
    parser.add_argument("--gpu-devices", type=int, help="Number of GPUs per node", default=8)
    parser.add_argument("--efa-devices", type=int, help="Number of EFA devices per node", default=None)
    parser.add_argument("--max_steps", type=int, help="Maximum number of steps", default=200)
-   parser.add_argument("--container_image", type=str, help="Container image to use", default="nvcr.io/nvidia/nemo:24.12")
+   parser.add_argument("--container_image", type=str, help="Container image to use", default="nvcr.io/nvidia/nemo:26.02")
    parser.add_argument("--env_vars_file", type=str, help="Path to the JSON file with environment variables", default="env_vars.json")
    parser.add_argument("--pvc_name", type=str, help="Name of the Persistent Volume Claim to use", default="fsx-claim")
    parser.add_argument("--pvc_mount_path", type=str, help="Path where the PVC should be mounted in the container", default="/mnt/nemo")
@@ -109,7 +109,7 @@ def skypilot_executor(
    gpus: str = "L40S",
    efa_devices: Optional[int] = None,
    custom_mounts: Optional[dict[str, str]] = None,
-   container_image: str = "nvcr.io/nvidia/nemo:24.12",
+   container_image: str = "nvcr.io/nvidia/nemo:26.02",
    env_vars_file: str = "env_vars.json",
    pvc_name: str = "nemo-runs",
    lora_enabled: bool = False,
diff --git a/3.test_cases/megatron/nemo/kubernetes/finetune_default_dataset.py b/3.test_cases/megatron/nemo/kubernetes/finetune_default_dataset.py
index 2a6e441da..d56c8f93f 100644
--- a/3.test_cases/megatron/nemo/kubernetes/finetune_default_dataset.py
+++ b/3.test_cases/megatron/nemo/kubernetes/finetune_default_dataset.py
@@ -16,7 +16,7 @@
 from nemo.utils import logging
 
 
-# python finetune.py --max_steps 200 --nodes 1 --gpus L40S --gpu-devices 8 --container_image nvcr.io/nvidia/nemo:24.12 --env_vars_file env_vars.json --pvc_name fsx-claim --pvc_mount_path /mnt/nemo
+# python finetune.py --max_steps 200 --nodes 1 --gpus L40S --gpu-devices 8 --container_image nvcr.io/nvidia/nemo:26.02 --env_vars_file env_vars.json --pvc_name fsx-claim --pvc_mount_path /mnt/nemo
 
 
 def get_parser():
@@ -26,7 +26,7 @@ def get_parser():
    parser.add_argument("--gpu-devices", type=int, help="Number of GPUs per node", default=8)
    parser.add_argument("--efa-devices", type=int, help="Number of EFA devices per node", default=None)
    parser.add_argument("--max_steps", type=int, help="Maximum number of steps", default=200)
-   parser.add_argument("--container_image", type=str, help="Container image to use", default="nvcr.io/nvidia/nemo:24.12")
+   parser.add_argument("--container_image", type=str, help="Container image to use", default="nvcr.io/nvidia/nemo:26.02")
    parser.add_argument("--env_vars_file", type=str, help="Path to the JSON file with environment variables", default="env_vars.json")
    parser.add_argument("--pvc_name", type=str, help="Name of the Persistent Volume Claim to use", default="fsx-claim")
    parser.add_argument("--pvc_mount_path", type=str, help="Path where the PVC should be mounted in the container", default="/mnt/nemo")
@@ -76,7 +76,7 @@ def skypilot_executor(
    gpus: str = "L40S",
    efa_devices: Optional[int] = None,
    custom_mounts: Optional[dict[str, str]] = None,
-   container_image: str = "nvcr.io/nvidia/nemo:24.12",
+   container_image: str = "nvcr.io/nvidia/nemo:26.02",
    env_vars_file: str = "env_vars.json",
    pvc_name: str = "nemo-runs",
    lora_enabled: bool = False,
diff --git a/3.test_cases/megatron/nemo/kubernetes/pretrain_custom_dataset.py b/3.test_cases/megatron/nemo/kubernetes/pretrain_custom_dataset.py
index 04f07bc7e..8a8d182fd 100644
--- a/3.test_cases/megatron/nemo/kubernetes/pretrain_custom_dataset.py
+++ b/3.test_cases/megatron/nemo/kubernetes/pretrain_custom_dataset.py
@@ -12,7 +12,7 @@
 from nemo.utils import logging
 from datasets import load_dataset
 
-# python pretrain.py --max_steps 200 --nodes 1 --gpus L40S --gpu-devices 8 --container_image nvcr.io/nvidia/nemo:24.12 --env_vars_file env_vars.json --pvc_name fsx-claim --pvc_mount_path /mnt/nemo
+# python pretrain.py --max_steps 200 --nodes 1 --gpus L40S --gpu-devices 8 --container_image nvcr.io/nvidia/nemo:26.02 --env_vars_file env_vars.json --pvc_name fsx-claim --pvc_mount_path /mnt/nemo
 
 
 def small_llama_cfg() -> llm.GPTConfig:
@@ -36,7 +36,7 @@ def get_parser():
    parser.add_argument("--gpu-devices", type=int, help="Number of GPUs per node", default=8)
    parser.add_argument("--efa-devices", type=int, help="Number of EFA devices per node", default=None)
    parser.add_argument("--max_steps", type=int, help="Maximum number of steps", default=200)
-   parser.add_argument("--container_image", type=str, help="Container image to use", default="nvcr.io/nvidia/nemo:24.12")
+   parser.add_argument("--container_image", type=str, help="Container image to use", default="nvcr.io/nvidia/nemo:26.02")
    parser.add_argument("--env_vars_file", type=str, help="Path to the JSON file with environment variables", default="env_vars.json")
    parser.add_argument("--pvc_name", type=str, help="Name of the Persistent Volume Claim to use", default="fsx-claim")
    parser.add_argument("--pvc_mount_path", type=str, help="Path where the PVC should be mounted in the container", default="/mnt/nemo")
@@ -54,7 +54,7 @@ def skypilot_executor(
    gpus: str = "L40S",
    efa_devices: Optional[int] = None,
    custom_mounts: Optional[dict[str, str]] = None,
-   container_image: str = "nvcr.io/nvidia/nemo:24.12",
+   container_image: str = "nvcr.io/nvidia/nemo:26.02",
    env_vars_file: str = "env_vars.json",
    pvc_name: str = "nemo-runs"
 ) -> run.SkypilotExecutor:
diff --git a/3.test_cases/megatron/nemo/kubernetes/pretrain_mock_dataset.py b/3.test_cases/megatron/nemo/kubernetes/pretrain_mock_dataset.py
index 9a807a59e..c36345981 100644
--- a/3.test_cases/megatron/nemo/kubernetes/pretrain_mock_dataset.py
+++ b/3.test_cases/megatron/nemo/kubernetes/pretrain_mock_dataset.py
@@ -15,7 +15,7 @@
 from nemo.lightning.run import plugins
 
 
-# python pretrain.py --max_steps 200 --nodes 1 --gpus L40S --gpu-devices 8 --container_image nvcr.io/nvidia/nemo:24.12 --env_vars_file env_vars.json --pvc_name fsx-claim --pvc_mount_path /mnt/nemo
+# python pretrain.py --max_steps 200 --nodes 1 --gpus L40S --gpu-devices 8 --container_image nvcr.io/nvidia/nemo:26.02 --env_vars_file env_vars.json --pvc_name fsx-claim --pvc_mount_path /mnt/nemo
 
 
 def small_llama_cfg() -> llm.GPTConfig:
@@ -39,7 +39,7 @@ def get_parser():
    parser.add_argument("--gpu-devices", type=int, help="Number of GPUs per node", default=8)
    parser.add_argument("--efa-devices", type=int, help="Number of EFA devices per node", default=None)
    parser.add_argument("--max_steps", type=int, help="Maximum number of steps", default=200)
-   parser.add_argument("--container_image", type=str, help="Container image to use", default="nvcr.io/nvidia/nemo:24.12")
+   parser.add_argument("--container_image", type=str, help="Container image to use", default="nvcr.io/nvidia/nemo:26.02")
    parser.add_argument("--env_vars_file", type=str, help="Path to the JSON file with environment variables", default="env_vars.json")
    parser.add_argument("--pvc_name", type=str, help="Name of the Persistent Volume Claim to use", default="fsx-claim")
    parser.add_argument("--pvc_mount_path", type=str, help="Path where the PVC should be mounted in the container", default="/mnt/nemo")
@@ -54,7 +54,7 @@ def skypilot_executor(
    gpus: str = "L40S",
    efa_devices: Optional[int] = None,
    custom_mounts: Optional[dict[str, str]] = None,
-   container_image: str = "nvcr.io/nvidia/nemo:24.12",
+   container_image: str = "nvcr.io/nvidia/nemo:26.02",
    env_vars_file: str = "env_vars.json",
    pvc_name: str = "nemo-runs",
 ) -> run.SkypilotExecutor:
diff --git a/3.test_cases/megatron/nemo/slurm/README.md b/3.test_cases/megatron/nemo/slurm/README.md
index 1dc51acbf..72abe4205 100644
--- a/3.test_cases/megatron/nemo/slurm/README.md
+++ b/3.test_cases/megatron/nemo/slurm/README.md
@@ -30,8 +30,8 @@ Before running NeMo jobs, build a custom optimized container image for EFA and C
 Build Image:
 
   ```bash
-  docker build --progress=plain -t aws-nemo:25.07 -f ../Dockerfile ..
-  enroot import -o ~/aws-nemo-25-07.sqsh dockerd://aws-nemo:25.07
+  docker build --progress=plain -t aws-nemo:26.02 -f ../Dockerfile ..
+  enroot import -o ~/aws-nemo-26-02.sqsh dockerd://aws-nemo:26.02
   ```
 
 ## 5. Install Dependencies and Prepare NeMo 2.0 Environment
@@ -73,7 +73,7 @@ In NeMo-Run, you can build and configure everything using Python, eliminating th
 In this example, we run the following script to start the LLaMa 8B pretraining job:
 
   ```bash
-  python run.py --container_image ~/aws-nemo-25-07.sqsh --nodes 2 --partition dev --env_vars_file env_vars.json --max_steps 1000
+  python run.py --container_image ~/aws-nemo-26-02.sqsh --nodes 2 --partition dev --env_vars_file env_vars.json --max_steps 1000
   ```
 
 ## 7. References
diff --git a/3.test_cases/megatron/nemo/slurm/env_vars.json b/3.test_cases/megatron/nemo/slurm/env_vars.json
index 3557d0a1e..27c3ce66a 100644
--- a/3.test_cases/megatron/nemo/slurm/env_vars.json
+++ b/3.test_cases/megatron/nemo/slurm/env_vars.json
@@ -2,8 +2,8 @@
     "TORCH_NCCL_AVOID_RECORD_STREAMS": "1",
     "NVTE_DP_AMAX_REDUCE_INTERVAL": "0",
     "NVTE_ASYNC_AMAX_REDUCTION": "1",
-    "NVTE_FUSED_ATTN": "0",
     "FI_EFA_USE_HUGE_PAGE": "0",
     "NCCL_DEBUG": "INFO",
-    "FI_PROVIDER": "efa"
+    "FI_PROVIDER": "efa",
+    "PATH": "/opt/venv/bin:/opt/slurm/bin:/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
 }
diff --git a/3.test_cases/megatron/nemo/slurm/run.py b/3.test_cases/megatron/nemo/slurm/run.py
index 754c8f51b..e6160ef9f 100644
--- a/3.test_cases/megatron/nemo/slurm/run.py
+++ b/3.test_cases/megatron/nemo/slurm/run.py
@@ -8,7 +8,7 @@
 from typing import Any, Optional
 from nemo.collections import llm
 from nemo.lightning.run import plugins
-from nemo.collections.nlp.modules.common.tokenizer_utils import get_nmt_tokenizer
+from nemo.collections.common.tokenizers.tokenizer_utils import get_nmt_tokenizer
 from nemo.collections.llm.recipes.callbacks.common import straggler_det_callback
 from nemo.lightning.pytorch.callbacks import PreemptionCallback
 from nemo.lightning.run import plugins
@@ -37,7 +37,7 @@ def get_parser():
    parser.add_argument("--nodes", type=int, help="Number of nodes to run on", default=1)
    parser.add_argument("--max_steps", type=int, help="Maximum number of steps", default=200)
    parser.add_argument("--account", type=str, help="Slurm account to use", default="ubuntu")
-   parser.add_argument("--container_image", type=str, help="Container image to use", default="/fsx/ubuntu/aws-nemo-24-12.sqsh")
+   parser.add_argument("--container_image", type=str, help="Container image to use", default="/fsx/ubuntu/aws-nemo-26-02.sqsh")
    parser.add_argument("--time", type=str, help="Time to run the job", default="01:00:00")
    parser.add_argument("--env_vars_file", type=str, help="Path to the JSON file with environment variables", default="env_vars.json")
    parser.add_argument("--ntasks_per_node", type=int, help="Number of tasks per node", default=8)
@@ -54,7 +54,7 @@ def slurm_executor(
    remote_job_dir: str = "/fsx/ubuntu/aws-nemo",
    time: str = "01:00:00",
    custom_mounts: Optional[list[str]] = None,
-   container_image: str = "/fsx/ubuntu/aws-nemo-24-12.sqsh",
+   container_image: str = "/fsx/ubuntu/aws-nemo-26-02.sqsh",
    env_vars_file: str = "env_vars.json",
    ntasks_per_node: int = 8,
    retries: int = 0,
diff --git a/3.test_cases/megatron/nemo/slurm/venv.sh b/3.test_cases/megatron/nemo/slurm/venv.sh
index 9e0972a7e..78ea5e5c6 100644
--- a/3.test_cases/megatron/nemo/slurm/venv.sh
+++ b/3.test_cases/megatron/nemo/slurm/venv.sh
@@ -3,29 +3,32 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: MIT-0
 
-set -e
+# Host-side venv for NeMo-Run. The actual training runs inside the
+# `nvcr.io/nvidia/nemo:26.02` container (built by ../Dockerfile); this venv
+# only needs to satisfy NeMo-Run's import-time deps on the head node.
 
-# Install NeMo-Run
-pip install git+https://github.com/NVIDIA/NeMo-Run.git@4d056535b5cce475b0536243e2cefcfa3897eee8
+set -e
 
-# # Install PyTorch
-pip install torch==2.6.0
- 
-# Install Megatron-LM
-pip install --no-deps git+https://github.com/NVIDIA/Megatron-LM.git@b5d90de8e7c7fae5f35be89d665f237970540bed
+# Pin to NeMo-Run v0.9.0 (Apr 2026 tag) instead of an arbitrary commit, so
+# `bash venv.sh` produces the same environment across runs.
+pip install "nemo-run==0.9.0"
 
-# # Download and install Mamba SSM
-wget https://github.com/state-spaces/mamba/releases/download/v2.2.2/mamba_ssm-2.2.2+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl  # Adjusted for torch 2.0
-pip install mamba_ssm-2.2.2+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
-rm mamba_ssm-2.2.2+cu118torch2.0cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
+# Torch is a NeMo-Run import-time dep on the host; CUDA flavor doesn't matter
+# here because all GPU work happens inside the container.
+pip install "torch==2.10.0"
 
-# Install NeMo Toolkit
-pip install nemo_toolkit['all']==2.1.0
+# Megatron-LM pinned to the version that NeMo 2.7.x is API-compatible with
+# (must match MEGATRON_CORE_VERSION in ../Dockerfile). nemo:26.02 ships
+# 0.16.1 alongside NeMo 2.7.1 by mistake — 0.16.x removed kwargs and
+# submodules NeMo 2.7.x still references (see Dockerfile comment for
+# details). 0.15.3 is the latest 0.15.x patch.
+pip install --no-deps "git+https://github.com/NVIDIA/Megatron-LM.git@core_v0.15.3"
 
-# Install OpenCC
-pip install opencc==1.1.6
+# NeMo Toolkit. PERFORMANCE.md (in this directory's parent) lists 2.5+ as
+# recommended on the NeMo 26.02 container. 2.7.3 is the latest patch in 2.x.
+pip install "nemo_toolkit[all]==2.7.3"
 
-# Clone and install NVIDIA Resiliency Extension
-pip install nvidia-resiliency-ext="v0.2.1"
+# NVIDIA Resiliency Extension for fault-tolerance plugins used in run.py.
+pip install "nvidia-resiliency-ext==0.4.1"
 
 echo "Environment setup complete."