From a37e5f504cebd7457ee512c022e541974d2d7629 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=BE=E7=BF=8A?= <songyi.wb@alibaba-inc.com>
Date: Wed, 2 Jul 2025 17:16:49 +0800
Subject: [PATCH 1/3] =?UTF-8?q?=E2=9C=A8=20feat(Ray):=20Enhance=20Ray?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add task_runner.py to support specifying resources, py_modules, and pip.

https://github.com/isaac-sim/IsaacLab/issues/2632
---
 CONTRIBUTORS.md                               |   1 +
 docs/source/features/ray.rst                  |  19 +-
 .../reinforcement_learning/ray/submit_job.py  |   9 +-
 .../reinforcement_learning/ray/task_runner.py | 176 ++++++++++++++++++
 4 files changed, 202 insertions(+), 3 deletions(-)
 create mode 100644 scripts/reinforcement_learning/ray/task_runner.py

diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
index fe4b06dd263..11596e4a669 100644
--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -125,6 +125,7 @@ Guidelines for modifications:
 * Ziqi Fan
 * Zoe McCarthy
 * David Leon
+* Song Yi
 
 ## Acknowledgements
 
diff --git a/docs/source/features/ray.rst b/docs/source/features/ray.rst
index 1f18a804ed0..a8b61cd0c31 100644
--- a/docs/source/features/ray.rst
+++ b/docs/source/features/ray.rst
@@ -46,7 +46,7 @@ specifying the ``--num_workers`` argument for resource-wrapped jobs, or ``--num_
 for tuning jobs, which is especially critical for parallel aggregate
 job processing on local/virtual multi-GPU machines. Tuning jobs assume homogeneous node resource composition for nodes with GPUs.
 
-The two following files contain the core functionality of the Ray integration.
+The three following files contain the core functionality of the Ray integration.
 
 .. dropdown:: scripts/reinforcement_learning/ray/wrap_resources.py
   :icon: code
@@ -62,6 +62,12 @@ The two following files contain the core functionality of the Ray integration.
     :language: python
     :emphasize-lines: 18-53
 
+.. dropdown:: scripts/reinforcement_learning/ray/task_runner.py
+  :icon: code
+
+  .. literalinclude:: ../../../scripts/reinforcement_learning/ray/task_runner.py
+    :language: python
+    :emphasize-lines: 9-55
 
 The following script can be used to submit aggregate
 jobs to one or more Ray cluster(s), which can be used for
@@ -73,7 +79,7 @@ resource requirements.
 
   .. literalinclude:: ../../../scripts/reinforcement_learning/ray/submit_job.py
     :language: python
-    :emphasize-lines: 12-53
+    :emphasize-lines: 13-59
 
 The following script can be used to extract KubeRay cluster information for aggregate job submission.
 
@@ -151,6 +157,15 @@ Submitting resource-wrapped individual jobs instead of automatic tuning runs is
     :language: python
     :emphasize-lines: 14-66
 
+Supports specifying per-task resources and setting ``py_modules`` and ``pip`` packages for each run.
+
+.. dropdown:: scripts/reinforcement_learning/ray/task_runner.py
+  :icon: code
+
+  .. literalinclude:: ../../../scripts/reinforcement_learning/ray/task_runner.py
+    :language: python
+    :emphasize-lines: 9-55
+
 Transferring files from the running container can be done as follows.
 
 .. code-block:: bash
diff --git a/scripts/reinforcement_learning/ray/submit_job.py b/scripts/reinforcement_learning/ray/submit_job.py
index 27c00eda71f..8d0649ebf8a 100644
--- a/scripts/reinforcement_learning/ray/submit_job.py
+++ b/scripts/reinforcement_learning/ray/submit_job.py
@@ -26,7 +26,11 @@
 creates several individual jobs when started on a cluster. Alternatively, an aggregate job
 could be a :file:'../wrap_resources.py` resource-wrapped job,
 which may contain several individual sub-jobs separated by
-the + delimiter.
+the + delimiter. An aggregate job could also be a :file:`../task_runner.py` multi-task submission job,
+where each sub-job and its resource requirements are defined in a YAML configuration file.
+In this mode, :file:`../task_runner.py` will read the YAML file (via --task_cfg), and
+submit all defined sub-tasks to the Ray cluster, supporting per-job resource specification and
+real-time streaming of sub-job outputs.
 
 If there are more aggregate jobs than cluster(s), aggregate jobs will be submitted
 as clusters become available via the defined relation above. If there are less aggregate job(s)
@@ -48,6 +52,9 @@
     # Example: Submitting resource wrapped job
     python3 scripts/reinforcement_learning/ray/submit_job.py --aggregate_jobs wrap_resources.py --test
 
+    # Example: submitting tasks with specific resources, and supporting pip packages and py_modules
+    python3 scripts/reinforcement_learning/ray/submit_job.py --aggregate_jobs task_runner.py --task_cfg tasks.yaml
+
     # For all command line arguments
     python3 scripts/reinforcement_learning/ray/submit_job.py -h
 """
diff --git a/scripts/reinforcement_learning/ray/task_runner.py b/scripts/reinforcement_learning/ray/task_runner.py
new file mode 100644
index 00000000000..044b0ab9147
--- /dev/null
+++ b/scripts/reinforcement_learning/ray/task_runner.py
@@ -0,0 +1,176 @@
+import yaml
+import ray
+import sys
+import argparse
+import subprocess
+import threading
+from enum import Enum
+
+"""
+This script dispatches one or more user-defined Python tasks to workers in a Ray cluster.
+Each task, with its resource requirements and execution parameters, is described in a YAML configuration file.
+You may specify the desired number of CPUs, GPUs, and memory allocation for each task in the config file.
+
+Key features:
+- Flexible resource management per task via config fields (`num_gpus`, `num_cpus`, `memory`).
+- Real-time output streaming (stdout/stderr) for each task.
+- Parallel execution of multiple tasks across cluster resources.
+
+Tasks are distributed and scheduled according to Ray’s built-in resource manager.
+
+Typical usage:
+---------------
+
+.. code-block:: bash
+
+    # Print help and argument details:
+    python task_runner.py -h
+
+    # Submit tasks defined in a YAML file to the Ray cluster (auto-detects Ray head address):
+    python task_runner.py --task_cfg /path/to/tasks.yaml
+
+YAML configuration example:
+---------------------------
+.. code-block:: yaml
+    pip: ["xxx"]
+    py_modules: ["my_package/my_package"]
+    tasks:
+      - name: "task1"
+        py_args: "-m torch.distributed.run --nnodes=1 ..."
+        num_gpus: 2
+        num_cpus: 10
+        memory: 10737418240
+      - name: "task2"
+        py_args: "script.py --option arg"
+        num_gpus: 0
+        num_cpus: 1
+        memory: 10*1024*1024*1024
+
+- `pip`: List of pip packages to install.
+- `py_args`: Arguments passed to the Python executable for this task.
+- `num_gpus`, `num_cpus`: Number of GPUs/CPUs to allocate. Can be integer or a string like `"2*2"`.
+- `memory`: Amount of memory (bytes) to allocate. Can be integer or a string like `"10*1024*1024*1024"`.
+
+To stop all tasks early, press Ctrl+C; the script will cancel all running Ray tasks.
+"""
+
+class OutputType(str, Enum):
+    STDOUT = "stdout"
+    STDERR = "stderr"
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Run tasks from a YAML config file.")
+    parser.add_argument("--task_cfg", type=str, required=True, help="Path to the YAML task file.")
+    parser.add_argument("--ray_address", type=str, default="auto", help="the Ray address.")
+    return parser.parse_args()
+    
+@ray.remote
+def task_wrapper(task):
+    task_name = task["name"]
+    task_py_args = task["py_args"]
+
+    # build command
+    cmd = [sys.executable, *task_py_args.split()]
+    print(f"[INFO]: {task_name} run: {' '.join(cmd)}")
+    def handle_stream(stream, output_type):
+        for line in iter(stream.readline, ''):
+            stripped_line = line.rstrip('\n')
+            if output_type == OutputType.STDOUT:
+                print(stripped_line)
+            elif output_type == OutputType.STDERR:
+                print(stripped_line, file=sys.stderr)
+        stream.close()
+    try:
+        process = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=1  # None for best performance and 1 for realtime output
+        )
+
+        # start tow threads to read stdout and stderr
+        stdout_thread = threading.Thread(
+            target=handle_stream, args=(process.stdout, OutputType.STDOUT)
+        )
+        stderr_thread = threading.Thread(
+            target=handle_stream, args=(process.stderr, OutputType.STDERR)
+        )
+        stdout_thread.start()
+        stderr_thread.start()
+        # wait for process to finish
+        process.wait()
+        # wait for threads to finish
+        stdout_thread.join()
+        stderr_thread.join()
+
+        returncode = process.returncode
+    except Exception as e:
+        print(f"[ERROR]: error while running task {task_name}: {str(e)}" )
+        raise e
+
+    print(f"[INFO]: task {task_name} finished with return code {returncode}")
+    return True
+
+
+def submit_tasks(ray_address,pip,py_modules,tasks):
+    if not tasks:
+        print("[WARNING]: no tasks to submit")
+        return
+
+    if not ray.is_initialized():
+        try:
+            ray.init(address=ray_address, log_to_driver=True, runtime_env={
+                "pip": pip,
+                "py_modules": py_modules,
+            })
+        except Exception as e:
+            raise RuntimeError(f"initialize ray failed: {str(e)}")
+    task_results = []
+    for  task in tasks:
+        num_gpus = eval(task["num_gpus"]) if isinstance(task["num_gpus"], str) else task["num_gpus"]
+        num_cpus = eval(task["num_cpus"]) if isinstance(task["num_cpus"], str) else task["num_cpus"]
+        memory = eval(task["memory"]) if isinstance(task["memory"], str) else task["memory"]
+        print(f"[INFO]: submitting task {task['name']} with num_gpus={num_gpus}, num_cpus={num_cpus}, memory={memory}")
+        task_results.append(task_wrapper.options(
+            num_gpus=num_gpus,
+            num_cpus=num_cpus,
+            memory=memory,
+        ).remote(task))
+    
+    try:
+        results = ray.get(task_results)
+        for i, _ in enumerate(results):
+            print(f"[INFO]: Task {tasks[i]['name']} finished")
+        print("[INFO]: all tasks completed.")
+    except KeyboardInterrupt:
+        print("[INFO]: dealing with keyboard interrupt")
+        for future in task_results:
+            ray.cancel(future,force=True)
+        print("[INFO]: all tasks cancelled.")
+        sys.exit(1)
+    except Exception as e:
+        print(f"[ERROR]: error while running tasks: {str(e)}")
+        raise e
+
+
+def main():
+    args = parse_args()
+    try:
+        with open(args.task_cfg, 'r') as f:
+            config = yaml.safe_load(f)
+    except Exception as e:
+        raise SystemExit(f"error while loading task config: {str(e)}")
+    tasks = config["tasks"]
+    py_modules = config.get("py_modules",None)
+    pip = config.get("pip",None)
+    submit_tasks(
+            ray_address=args.ray_address,
+            pip=pip,
+            py_modules=py_modules,
+            tasks=tasks,
+        )
+
+if __name__ == "__main__":
+    main()
+

From 46ca9430ec5b2cdcf5c3888c97318cc09e7f05b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=BE=E7=BF=8A?= <songyi.wb@alibaba-inc.com>
Date: Tue, 8 Jul 2025 17:59:33 +0800
Subject: [PATCH 2/3] refactor(Ray): Use execute_job and improve code structure

---
 .../reinforcement_learning/ray/task_runner.py | 153 ++++++++----------
 1 file changed, 68 insertions(+), 85 deletions(-)

diff --git a/scripts/reinforcement_learning/ray/task_runner.py b/scripts/reinforcement_learning/ray/task_runner.py
index 044b0ab9147..9bbfda2791f 100644
--- a/scripts/reinforcement_learning/ray/task_runner.py
+++ b/scripts/reinforcement_learning/ray/task_runner.py
@@ -1,10 +1,14 @@
+# Copyright (c) 2022-2025, The Isaac Lab Project Developers (https://github.com/isaac-sim/IsaacLab/blob/main/CONTRIBUTORS.md).
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+
+import argparse
+import sys
 import yaml
+
 import ray
-import sys
-import argparse
-import subprocess
-import threading
-from enum import Enum
+import util
 
 """
 This script dispatches one or more user-defined Python tasks to workers in a Ray cluster.
@@ -36,7 +40,7 @@
     py_modules: ["my_package/my_package"]
     tasks:
       - name: "task1"
-        py_args: "-m torch.distributed.run --nnodes=1 ..."
+        py_args: "-m torch.distributed.run --nnodes=1 --nproc_per_node=2  --rdzv_endpoint=localhost:29501 /workspace/isaaclab/scripts/reinforcement_learning/rsl_rl/train.py --task=Isaac-Cartpole-v0 --max_iterations 200 --headless --distributed"
         num_gpus: 2
         num_cpus: 10
         memory: 10737418240
@@ -54,99 +58,77 @@
 To stop all tasks early, press Ctrl+C; the script will cancel all running Ray tasks.
 """
 
-class OutputType(str, Enum):
-    STDOUT = "stdout"
-    STDERR = "stderr"
 
 def parse_args():
     parser = argparse.ArgumentParser(description="Run tasks from a YAML config file.")
     parser.add_argument("--task_cfg", type=str, required=True, help="Path to the YAML task file.")
     parser.add_argument("--ray_address", type=str, default="auto", help="the Ray address.")
+    parser.add_argument(
+        "--test",
+        action="store_true",
+        help=(
+            "Run nvidia-smi test instead of the arbitrary job,"
+            "can use as a sanity check prior to any jobs to check "
+            "that GPU resources are correctly isolated."
+        ),
+    )
     return parser.parse_args()
-    
-@ray.remote
-def task_wrapper(task):
-    task_name = task["name"]
-    task_py_args = task["py_args"]
-
-    # build command
-    cmd = [sys.executable, *task_py_args.split()]
-    print(f"[INFO]: {task_name} run: {' '.join(cmd)}")
-    def handle_stream(stream, output_type):
-        for line in iter(stream.readline, ''):
-            stripped_line = line.rstrip('\n')
-            if output_type == OutputType.STDOUT:
-                print(stripped_line)
-            elif output_type == OutputType.STDERR:
-                print(stripped_line, file=sys.stderr)
-        stream.close()
-    try:
-        process = subprocess.Popen(
-            cmd,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True,
-            bufsize=1  # None for best performance and 1 for realtime output
-        )
-
-        # start tow threads to read stdout and stderr
-        stdout_thread = threading.Thread(
-            target=handle_stream, args=(process.stdout, OutputType.STDOUT)
-        )
-        stderr_thread = threading.Thread(
-            target=handle_stream, args=(process.stderr, OutputType.STDERR)
-        )
-        stdout_thread.start()
-        stderr_thread.start()
-        # wait for process to finish
-        process.wait()
-        # wait for threads to finish
-        stdout_thread.join()
-        stderr_thread.join()
-
-        returncode = process.returncode
-    except Exception as e:
-        print(f"[ERROR]: error while running task {task_name}: {str(e)}" )
-        raise e
 
-    print(f"[INFO]: task {task_name} finished with return code {returncode}")
-    return True
+
+def parse_task_opt(task):
+    opts = {}
+    if "num_gpus" in task:
+        opts["num_gpus"] = eval(task["num_gpus"]) if isinstance(task["num_gpus"], str) else task["num_gpus"]
+    if "num_cpus" in task:
+        opts["num_cpus"] = eval(task["num_cpus"]) if isinstance(task["num_cpus"], str) else task["num_cpus"]
+    if "memory" in task:
+        opts["memory"] = eval(task["memory"]) if isinstance(task["memory"], str) else task["memory"]
+    return opts
+
+
+@ray.remote
+def remote_execute_job(job_cmd: str, identifier_string: str, test_mode: bool) -> str | dict:
+    return util.execute_job(
+        job_cmd=job_cmd,
+        identifier_string=identifier_string,
+        test_mode=test_mode,
+        log_all_output=True,  # make log_all_output=True to check output in real time
+    )
 
 
-def submit_tasks(ray_address,pip,py_modules,tasks):
+def run_tasks(ray_address, pip, py_modules, tasks, test_mode=False):
     if not tasks:
         print("[WARNING]: no tasks to submit")
         return
 
     if not ray.is_initialized():
         try:
-            ray.init(address=ray_address, log_to_driver=True, runtime_env={
-                "pip": pip,
-                "py_modules": py_modules,
-            })
+            ray.init(
+                address=ray_address,
+                log_to_driver=True,
+                runtime_env={
+                    "pip": pip,
+                    "py_modules": py_modules,
+                },
+            )
         except Exception as e:
             raise RuntimeError(f"initialize ray failed: {str(e)}")
     task_results = []
-    for  task in tasks:
-        num_gpus = eval(task["num_gpus"]) if isinstance(task["num_gpus"], str) else task["num_gpus"]
-        num_cpus = eval(task["num_cpus"]) if isinstance(task["num_cpus"], str) else task["num_cpus"]
-        memory = eval(task["memory"]) if isinstance(task["memory"], str) else task["memory"]
-        print(f"[INFO]: submitting task {task['name']} with num_gpus={num_gpus}, num_cpus={num_cpus}, memory={memory}")
-        task_results.append(task_wrapper.options(
-            num_gpus=num_gpus,
-            num_cpus=num_cpus,
-            memory=memory,
-        ).remote(task))
-    
+    for task in tasks:
+        opts = parse_task_opt(task)
+        task_cmd = " ".join([sys.executable, *task["py_args"].split()])
+        print(f"[INFO] submitting task {task['name']} with opts={opts}: {task_cmd}")
+        task_results.append(remote_execute_job.options(**opts).remote(task_cmd, task["name"], test_mode))
+
     try:
         results = ray.get(task_results)
-        for i, _ in enumerate(results):
-            print(f"[INFO]: Task {tasks[i]['name']} finished")
+        for i, result in enumerate(results):
+            print(f"[INFO]: Task {tasks[i]['name']} result: \n{result}")
         print("[INFO]: all tasks completed.")
     except KeyboardInterrupt:
         print("[INFO]: dealing with keyboard interrupt")
         for future in task_results:
-            ray.cancel(future,force=True)
+            ray.cancel(future, force=True)
         print("[INFO]: all tasks cancelled.")
         sys.exit(1)
     except Exception as e:
@@ -157,20 +139,21 @@ def submit_tasks(ray_address,pip,py_modules,tasks):
 def main():
     args = parse_args()
     try:
-        with open(args.task_cfg, 'r') as f:
+        with open(args.task_cfg) as f:
             config = yaml.safe_load(f)
     except Exception as e:
         raise SystemExit(f"error while loading task config: {str(e)}")
     tasks = config["tasks"]
-    py_modules = config.get("py_modules",None)
-    pip = config.get("pip",None)
-    submit_tasks(
-            ray_address=args.ray_address,
-            pip=pip,
-            py_modules=py_modules,
-            tasks=tasks,
-        )
+    py_modules = config.get("py_modules")
+    pip = config.get("pip")
+    run_tasks(
+        ray_address=args.ray_address,
+        pip=pip,
+        py_modules=py_modules,
+        tasks=tasks,
+        test_mode=args.test,
+    )
+
 
 if __name__ == "__main__":
     main()
-

From 3a7a349c0527cb024cd9f9e56ef7a6ce8fb82d04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E6=9D=BE=E7=BF=8A?= <songyi.wb@alibaba-inc.com>
Date: Tue, 8 Jul 2025 18:01:06 +0800
Subject: [PATCH 3/3] docs(Ray): Update documentation about task_runner.py

---
 docs/source/features/ray.rst                     | 4 ++--
 scripts/reinforcement_learning/ray/submit_job.py | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/source/features/ray.rst b/docs/source/features/ray.rst
index a8b61cd0c31..f5d73e7a910 100644
--- a/docs/source/features/ray.rst
+++ b/docs/source/features/ray.rst
@@ -67,7 +67,7 @@ The three following files contain the core functionality of the Ray integration.
 
   .. literalinclude:: ../../../scripts/reinforcement_learning/ray/task_runner.py
     :language: python
-    :emphasize-lines: 9-55
+    :emphasize-lines: 13-59
 
 The following script can be used to submit aggregate
 jobs to one or more Ray cluster(s), which can be used for
@@ -79,7 +79,7 @@ resource requirements.
 
   .. literalinclude:: ../../../scripts/reinforcement_learning/ray/submit_job.py
     :language: python
-    :emphasize-lines: 13-59
+    :emphasize-lines: 13-61
 
 The following script can be used to extract KubeRay cluster information for aggregate job submission.
 
diff --git a/scripts/reinforcement_learning/ray/submit_job.py b/scripts/reinforcement_learning/ray/submit_job.py
index 8d0649ebf8a..84441eb7638 100644
--- a/scripts/reinforcement_learning/ray/submit_job.py
+++ b/scripts/reinforcement_learning/ray/submit_job.py
@@ -53,6 +53,7 @@
     python3 scripts/reinforcement_learning/ray/submit_job.py --aggregate_jobs wrap_resources.py --test
 
     # Example: submitting tasks with specific resources, and supporting pip packages and py_modules
+    # You may use relative paths for task_cfg and py_modules, placing them in the scripts/reinforcement_learning/ray directory, which will be uploaded to the cluster.
     python3 scripts/reinforcement_learning/ray/submit_job.py --aggregate_jobs task_runner.py --task_cfg tasks.yaml
 
     # For all command line arguments