Skip to content

Commit 1e7aa2f

Browse files
author
pytorchbot
committed
2025-11-13 nightly release (6b65eb4)
1 parent ce30d03 commit 1e7aa2f

File tree

3 files changed

+4
-32
lines changed

3 files changed

+4
-32
lines changed

.meta/mast/env_setup.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# setup_forge_env.sh - Setup conda environment and install forge with mounting
1010

1111
# Configuration
12-
CONDA_ENV_NAME="forge:41468b33a03eaf2bf5b44517f418028a"
12+
CONDA_ENV_NAME="forge:314c3548ae691f4aa2e49f1b1fad06b3"
1313

1414
# Colors for output
1515
RED='\033[0;31m'

apps/sft/main.py

Lines changed: 0 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -81,34 +81,8 @@ def __init__(self, config: DictConfig):
8181
self.gradient_accumulation_steps = 1 # Example value, adjust as needed
8282
self._rank = current_rank().rank
8383
self._size = math.prod(current_size().values())
84-
self._init_dist()
8584
super().__init__(job_config)
8685

87-
def _init_dist(self):
88-
"""Initializes torch distributed.
89-
90-
torchrun normally hands this, but we need to do it ourselves
91-
in monarch for now.
92-
93-
We should consider putting this into ForgeActor, but having this
94-
be explicit for now.
95-
96-
"""
97-
env = {
98-
"RANK": str(self._rank),
99-
"LOCAL_RANK": str(self._rank),
100-
"LOCAL_WORLD_SIZE": str(self._size),
101-
"GROUP_RANK": str(self._size),
102-
"GROUP_WORLD_SIZE": str(self._size),
103-
"ROLE_RANK": str(self._rank),
104-
"ROLE_WORLD_SIZE": str(self._size),
105-
"ROLE_NAME": "rank",
106-
"WORLD_SIZE": str(self._size),
107-
"PYTORCH_CUDA_ALLOC_CONF": "expandable_segments:True",
108-
}
109-
os.environ.update(env)
110-
logger.info("env: {}".format(env))
111-
11286
async def setup_metric_logger(self):
11387
"""Initialization happens in the main process. Here we just retrieve it"""
11488
mlogger = await get_or_create_metric_logger()

src/forge/controller/launcher.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,18 @@
1515
from typing import Any
1616

1717
import monarch
18-
1918
import torchx.specs as specs
20-
21-
from forge.types import Launcher, LauncherConfig
2219
from monarch._rust_bindings.monarch_hyperactor.alloc import AllocConstraints
2320
from monarch._rust_bindings.monarch_hyperactor.channel import ChannelTransport
24-
2521
from monarch._rust_bindings.monarch_hyperactor.config import configure
2622
from monarch._src.actor.allocator import RemoteAllocator, TorchXRemoteAllocInitializer
2723
from monarch.actor import Actor, endpoint, ProcMesh
2824
from monarch.tools import commands
2925
from monarch.tools.commands import create, info
3026
from monarch.tools.config import Config, Workspace
3127

28+
from forge.types import Launcher, LauncherConfig
29+
3230
_MAST_AVAILABLE = False
3331

3432
try:
@@ -267,7 +265,7 @@ async def launch_mast_job(self):
267265

268266
def add_additional_packages(self, packages: "Packages") -> "Packages":
269267
packages.add_package("oil.oilfs:stable")
270-
packages.add_package("manifold.manifoldfs")
268+
packages.add_package("manifold.manifoldfs:prod")
271269
return packages
272270

273271
def build_appdef(self) -> specs.AppDef:

0 commit comments

Comments
 (0)