Skip to content

Commit

Permalink
Cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelbenayoun committed Feb 9, 2024
1 parent c19d511 commit 69f144b
Show file tree
Hide file tree
Showing 2 changed files with 1 addition and 44 deletions.
44 changes: 1 addition & 43 deletions optimum/neuron/trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
import sys
import time
import warnings
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -116,38 +114,11 @@
if KEEP_HF_HUB_PROGRESS_BARS is None:
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"

# Used for torch.distributed.
_ORIGINAL_NEURON_CACHE_PATH: Optional[Path] = None
_TMP_NEURON_CACHE_DIR: Optional[TemporaryDirectory] = None
_TMP_NEURON_CACHE_PATH: Optional[Path] = None
_TCP_STORE_ADDRESS = "127.0.0.1"
_TCP_STORE_PORT = 5000


if os.environ.get("TORCHELASTIC_RUN_ID"):
import torch_xla.distributed.xla_backend as xbn

if not isinstance(torch.distributed.group.WORLD, xbn.ProcessGroupXla):
# _ORIGINAL_NEURON_CACHE_PATH = get_neuron_cache_path()

# # _ORIGINAL_NEURON_CACHE_PATH is `None` when the `--no-cache` flag is set.
# if _ORIGINAL_NEURON_CACHE_PATH is not None:
# if is_precompilation():
# # During precompilation, we make sure to set the cache path to the defined compile cache path by the
# # user. If nothing is specified, it is set to the default compile cache used by the Neuron compiler:
# # /var/tmp/neuron-compile-cache
# set_neuron_cache_path(_ORIGINAL_NEURON_CACHE_PATH)
# else:
# if os.environ["RANK"] == "0":
# _TMP_NEURON_CACHE_DIR = NeuronCacheCallback.create_temporary_neuron_cache(get_neuron_cache_path())
# store = torch.distributed.TCPStore(_TCP_STORE_ADDRESS, _TCP_STORE_PORT, is_master=True)
# store.set("tmp_neuron_cache_path", _TMP_NEURON_CACHE_DIR.name)
# _TMP_NEURON_CACHE_PATH = Path(_TMP_NEURON_CACHE_DIR.name)
# else:
# store = torch.distributed.TCPStore(_TCP_STORE_ADDRESS, _TCP_STORE_PORT, is_master=False)
# _TMP_NEURON_CACHE_PATH = Path(store.get("tmp_neuron_cache_path").decode("utf-8"))
# set_neuron_cache_path(_TMP_NEURON_CACHE_PATH)

torch.distributed.init_process_group(backend="xla")
if not isinstance(torch.distributed.group.WORLD, xbn.ProcessGroupXla):
raise AssertionError("Failed to initialize torch.distributed process group using XLA backend.")
Expand Down Expand Up @@ -197,19 +168,6 @@ def __init__(self, *args, **kwargs):
if self.args.local_rank <= 0:
logger.setLevel(logging.INFO)

push = self.args.local_rank <= 0 and not is_precompilation() and not self.args.skip_cache_push
fetch = self.args.local_rank <= 0 or self.args.mp_plugin.should_parallelize

# callback = NeuronCacheCallback(
# tmp_neuron_cache=_TMP_NEURON_CACHE_PATH,
# original_neuron_cache_path=_ORIGINAL_NEURON_CACHE_PATH,
# fetch=fetch,
# push=push,
# wait_for_everyone_on_fetch=True,
# wait_for_everyone_on_push=True,
# )
# self.add_callback(callback)

# Make the model Neuron-compatible for generation.
patch_generation_mixin_to_neuron_generation_mixin(self.model)

Expand Down Expand Up @@ -1321,7 +1279,7 @@ def evaluation_loop(
def train(
self,
resume_from_checkpoint: Optional[Union[str, bool]] = None,
trial: Union["optuna.Trial", Dict[str, Any]] = None,
trial=None, # No type-annotation for this one because it is related to the optuna package.
ignore_keys_for_eval: Optional[List[str]] = None,
**kwargs,
):
Expand Down
1 change: 0 additions & 1 deletion optimum/neuron/utils/hub_neuronx_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,6 @@ def patch(restore: bool = False):
main_dir = Path(path.split(":")[0])
exists = "neuron_cc_wrapper" in os.listdir(main_dir)
if exists and not restore:
print("exist")
src = main_dir / "neuron_cc_wrapper"
dst = main_dir / "neuron_cc_wrapper_backup"
shutil.move(src, dst)
Expand Down

0 comments on commit 69f144b

Please sign in to comment.