Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
3527e6e
Adapted megatronlm server implementation for interacting with lm eval…
KlaudiaTH Aug 3, 2023
62c0def
Merge branch 'main' into megatron_lmeval_server
KlaudiaTH Aug 3, 2023
630a38f
Removed some comments from text_generation_server.py
KlaudiaTH Aug 3, 2023
0d7f8fd
Minor correction
KlaudiaTH Aug 3, 2023
7a536b8
integrated first methods of hf tokenizer
Aug 4, 2023
db0c036
added tokenizer
Aug 6, 2023
685be55
bugfix
Aug 7, 2023
12948f7
retrieve eod id from tokenizer
Aug 7, 2023
44d1bbb
bugfix 2
Aug 7, 2023
e6e6b75
bugfix 3
Aug 7, 2023
31ebade
bugfix 4
Aug 7, 2023
5773be8
bugfix 4
Aug 7, 2023
1cfe037
_HFTokenizer typo
Aug 7, 2023
2dd938d
added functions
Aug 8, 2023
abe9d7a
integrated pretrained hf tokenizer
Aug 11, 2023
c14fefe
Add metadata query
janEbert Aug 11, 2023
fa6c3fb
bugfix PretrainedHFTokenizer
Aug 11, 2023
0530610
bugfix
Aug 11, 2023
0c8461d
Merge remote-tracking branch 'origin/add-gptx-tokenizers' into megatr…
KlaudiaTH Aug 12, 2023
f42ded1
MegatronLM server API adaption. Example sh files.
KlaudiaTH Aug 12, 2023
341f53a
Adaptations for greedy until generation; minor fixes
KlaudiaTH Aug 16, 2023
5bba337
API and SP tokenizer adaptions for handling continuations
KlaudiaTH Aug 18, 2023
10d7fe8
Server: Don't return padding tokens
KlaudiaTH Aug 24, 2023
ac09fe4
Corrected is_max_logprobs slicing
KlaudiaTH Aug 29, 2023
526ec2a
Added option for padding to seq_len during tokenization and generation
KlaudiaTH Aug 29, 2023
22aa758
Minor fix
KlaudiaTH Aug 29, 2023
d704b30
Corrected monolingual bpe sp 32k example
KlaudiaTH Sep 5, 2023
ab63e91
Server: Add argument for specifying HTTP port
KlaudiaTH Oct 16, 2023
c0cb866
Merge branch 'main' into megatron_lmeval_server
KlaudiaTH Nov 3, 2023
5b214f9
training.py: Import vision modules only when needed
KlaudiaTH Nov 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ build
*~
slurm*
logs
.vscode/
apex/
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash


export CUDA_DEVICE_MAX_CONNECTIONS=1
export TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_IB_TIMEOUT=50
export UCX_RC_TIMEOUT=4s
export NCCL_IB_RETRY_CNT=10
export NCCL_SOCKET_IFNAME=ib0
export GLOO_SOCKET_IFNAME=ib0

MASTER_ADDR="$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)"
MASTER_ADDR="${MASTER_ADDR}i"
MASTER_ADDR="$(nslookup "$MASTER_ADDR" | grep -oP '(?<=Address: ).*')"
MASTER_PORT=6000


DISTRIBUTED_ARGS="--nproc_per_node 2 \
--nnodes 1 \
--node_rank 0 \
--rdzv_endpoint $MASTER_ADDR:$MASTER_PORT \
--rdzv_backend c10d"


python -u -m torch.distributed.run $DISTRIBUTED_ARGS ./tasks/main.py \
--task "LAMBADA" \
--valid-data /p/project/opengptx-elm/thellmann1/opengpt_2023/data/bflm/lambada_test.jsonl \
--strict-lambada \
--tokenizer-type OpenGPTX-SPTokenizer \
--tokenizer-model /p/scratch/opengptx-elm/data/datasources_opgptx/data_quality_experiments_datasets/ablations_studies/monolingual_en/70B_10/tokenizer_training/bpe/sp/32768_10/bpe_tokenizer.model \
--load /p/scratch/opengptx-elm/ali5/opengpt/megatron-lm/2023-07-27_17-52-53/output_dir/2_6B_monolingual_eng-bpe_sp_32768_10_rotary.sh/checkpoints \
--tensor-model-parallel-size 2 \
--pipeline-model-parallel-size 1 \
--no-position-embedding \
--position-embedding-type rotary \
--num-layers 32 \
--hidden-size 2560 \
--num-attention-heads 32 \
--micro-batch-size 5 \
--global-batch-size 480 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--log-interval 10 \
--bf16
48 changes: 48 additions & 0 deletions examples/run_text_generation_server_2_6B.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash

set -x -e

export CUDA_DEVICE_MAX_CONNECTIONS=1
export TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_IB_TIMEOUT=50
export UCX_RC_TIMEOUT=4s
export NCCL_IB_RETRY_CNT=10
export NCCL_SOCKET_IFNAME=ib0
export NCCL_DEBUG=INFO
export MASTER_ADDR=127.0.0.1
export MASTER_PORT=60234

export CMD=" \
tools/run_text_generation_server.py \
--load /p/scratch/opengptx-elm/ali5/opengpt/megatron-lm/2023-07-27_17-52-51/output_dir/2_6B_monolingual_eng-bpe_sp_32768_10_nope.sbatch/checkpoints/53100 \
--tokenizer-model /p/scratch/opengptx-elm/data/datasources_opgptx/data_quality_experiments_datasets/ablations_studies/monolingual_en/70B_10/tokenizer_training/bpe/sp/32768_10/bpe_tokenizer.model \
--tokenizer-type OpenGPTX-SPTokenizer \
--pipeline-model-parallel-size 1 \
--tensor-model-parallel-size 2 \
--num-layers 32 \
--hidden-size 2560 \
--num-attention-heads 32 \
--max-position-embeddings 2048 \
--bf16 \
--micro-batch-size 1 \
--seq-length 2048 \
--out-seq-length 2048 \
--temperature 0.8 \
--top_p 0.5 \
--seed 42 \
--position-embedding-type none \
--no-position-embedding \
--use-flash-attn \
--reset-attention-mask \
--reset-position-ids"


export LAUNCHER="python -u -m torch.distributed.run \
--nproc_per_node 2 \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"

bash -c "$LAUNCHER $CMD"
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

set -x -e

export CUDA_DEVICE_MAX_CONNECTIONS=1
export TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_IB_TIMEOUT=50
export UCX_RC_TIMEOUT=4s
export NCCL_IB_RETRY_CNT=10
export NCCL_SOCKET_IFNAME=ib0
export NCCL_DEBUG=INFO
export MASTER_ADDR=127.0.0.1
export MASTER_PORT=60234
export MAX_JOBS=$SLURM_JOB_CPUS_PER_NODE

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-${(%):-%x}}" )" &> /dev/null && pwd )


export CMD=" \
$SCRIPT_DIR/../tools/run_text_generation_server.py \
--load /beegfs/p_gptx/tokenizer_study/cp_2_6B_iter_0053100/checkpoints/2_6B_monolingual_eng-bpe_hf_32768_10_rotary.sh/checkpoints \
--tokenizer-model /beegfs/p_gptx/tokenizer_study/2_6B_tokenizer_models/2_6B_monolingual_eng-bpe_hf_32768_10_rotary.sh/tokenizer/iter_0053100/tokenizer.json \
--tokenizer-type OpenGPTX-HFTokenizer \
--pipeline-model-parallel-size 1 \
--tensor-model-parallel-size 2 \
--num-layers 32 \
--hidden-size 2560 \
--num-attention-heads 32 \
--max-position-embeddings 2048 \
--bf16 \
--micro-batch-size 5 \
--seq-length 2048 \
--out-seq-length 2048 \
--temperature 0.8 \
--top_p 0.5 \
--seed 42 \
--position-embedding-type rotary \
--no-position-embedding \
"

export LAUNCHER="python -u -m torch.distributed.launch \
--nproc_per_node 2 \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"


bash -c "$LAUNCHER $CMD"

Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

set -x -e

export CUDA_DEVICE_MAX_CONNECTIONS=1
export TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_IB_TIMEOUT=50
export UCX_RC_TIMEOUT=4s
export NCCL_IB_RETRY_CNT=10
export NCCL_SOCKET_IFNAME=ib0
export NCCL_DEBUG=INFO
export MASTER_ADDR=127.0.0.1
export MASTER_PORT=60234
export MAX_JOBS=$SLURM_JOB_CPUS_PER_NODE

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-${(%):-%x}}" )" &> /dev/null && pwd )


export CMD=" \
$SCRIPT_DIR/../tools/run_text_generation_server.py \
--load /p/scratch/opengptx-elm/ali5/opengpt/megatron-lm/2023-07-31_16-09-58/output_dir/2_6B_monolingual_eng-bpe_hf_100352_10_rotary.sh/checkpoints \
--tokenizer-model /p/scratch/opengptx-elm/data/datasources_opgptx/data_quality_experiments_datasets/ablations_studies/monolingual_en/70B_10/tokenizer_training/bpe/sp/100352_10/bpe_tokenizer.model \
--tokenizer-type OpenGPTX-SPTokenizer \
--pipeline-model-parallel-size 1 \
--tensor-model-parallel-size 2 \
--num-layers 32 \
--hidden-size 2560 \
--num-attention-heads 32 \
--max-position-embeddings 2048 \
--bf16 \
--micro-batch-size 5 \
--seq-length 2048 \
--out-seq-length 2048 \
--temperature 0.8 \
--top_p 0.5 \
--seed 42 \
--position-embedding-type rotary \
--no-position-embedding \
"

export LAUNCHER="python -u -m torch.distributed.run \
--nproc_per_node 2 \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"


bash -c "$LAUNCHER $CMD"
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

set -x -e

export CUDA_DEVICE_MAX_CONNECTIONS=1
export TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_IB_TIMEOUT=50
export UCX_RC_TIMEOUT=4s
export NCCL_IB_RETRY_CNT=10
export NCCL_SOCKET_IFNAME=ib0
export NCCL_DEBUG=INFO
export MASTER_ADDR=127.0.0.1
export MASTER_PORT=60234
export MAX_JOBS=$SLURM_JOB_CPUS_PER_NODE

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-${(%):-%x}}" )" &> /dev/null && pwd )


export CMD=" \
$SCRIPT_DIR/../tools/run_text_generation_server.py \
--load /p/scratch/opengptx-elm/ali5/opengpt/megatron-lm/2023-07-27_17-52-51/output_dir/2_6B_monolingual_eng-bpe_sp_32768_10_nope.sbatch/checkpoints \
--tokenizer-model /p/scratch/opengptx-elm/data/datasources_opgptx/data_quality_experiments_datasets/ablations_studies/monolingual_en/70B_10/tokenizer_training/bpe/sp/32768_10/bpe_tokenizer.model \
--tokenizer-type OpenGPTX-SPTokenizer \
--pipeline-model-parallel-size 1 \
--tensor-model-parallel-size 2 \
--num-layers 32 \
--hidden-size 2560 \
--num-attention-heads 32 \
--max-position-embeddings 2048 \
--bf16 \
--micro-batch-size 5 \
--seq-length 2048 \
--out-seq-length 2048 \
--temperature 0.8 \
--top_p 0.5 \
--seed 42 \
--position-embedding-type none \
--no-position-embedding \
"

export LAUNCHER="python -u -m torch.distributed.run \
--nproc_per_node 2 \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"


bash -c "$LAUNCHER $CMD"
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/bin/bash

set -x -e

export CUDA_DEVICE_MAX_CONNECTIONS=1
export TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_IB_TIMEOUT=50
export UCX_RC_TIMEOUT=4s
export NCCL_IB_RETRY_CNT=10
export NCCL_SOCKET_IFNAME=ib0
export NCCL_DEBUG=INFO
export MASTER_ADDR=127.0.0.1
export MASTER_PORT=60234
export MAX_JOBS=$SLURM_JOB_CPUS_PER_NODE

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-${(%):-%x}}" )" &> /dev/null && pwd )


export CMD=" \
$SCRIPT_DIR/../tools/run_text_generation_server.py \
--load /p/scratch/opengptx-elm/ali5/opengpt/megatron-lm/2023-07-27_17-52-53/output_dir/2_6B_monolingual_eng-bpe_sp_32768_10_rotary.sh/checkpoints \
--out-seq-length 2048 \
--temperature 0.8 \
--top_p 0.5 \
--tensor-model-parallel-size 2 \
--pipeline-model-parallel-size 1 \
--max-tokens-to-oom=300000 \
--num-layers 32 \
--hidden-size 2560 \
--num-attention-heads 32 \
--seq-length 2048 \
--max-position-embeddings 2048 \
--micro-batch-size 5 \
--global-batch-size 480 \
--train-samples 25_488_281 \
--tokenizer-type OpenGPTX-SPTokenizer \
--tokenizer-model /p/scratch/opengptx-elm/data/datasources_opgptx/data_quality_experiments_datasets/ablations_studies/monolingual_en/70B_10/tokenizer_training/bpe/sp/32768_10/bpe_tokenizer.model \
--init-method-std 0.02 \
--bf16 \
--seed 42 \
--no-position-embedding \
--position-embedding-type rotary \
--optimizer adam \
--adam-beta1 0.9 \
--adam-beta2 0.95 \
--adam-eps 1e-8 \
--lr 1.6e-4 \
--min-lr 1.6e-5 \
--lr-decay-style cosine \
--lr-decay-samples 22_089_843 \
--lr-warmup-samples 31_860 \
--clip-grad 1.0 \
--weight-decay 1e-1 \
--use-distributed-optimizer \
--log-interval 100 \
--log-memory-to-tensorboard \
--log-world-size-to-tensorboard \
--save-interval 3000 \
--eval-interval 3000 \
--eval-iters 1 \
--tensorboard-dir /p/scratch/opengptx-elm/ali5/opengpt/megatron-lm/2023-07-27_17-52-53/output_dir/2_6B_monolingual_eng-bpe_sp_32768_10_rotary.sh/tensorboard \
--tensorboard-queue-size 5 \
--log-timers-to-tensorboard \
--log-batch-size-to-tensorboard \
--log-validation-ppl-to-tensorboard \
--num-workers 11 \
--data-impl mmap \
--distributed-backend nccl \
--load /p/scratch/opengptx-elm/ali5/opengpt/megatron-lm/2023-07-27_17-52-53/output_dir/2_6B_monolingual_eng-bpe_sp_32768_10_rotary.sh/checkpoints \
"

export LAUNCHER="python -u -m torch.distributed.run \
--nproc_per_node 2 \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"


bash -c "$LAUNCHER $CMD"
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash

set -x -e

export CUDA_DEVICE_MAX_CONNECTIONS=1
export TORCHELASTIC_ERROR_FILE=/tmp/torch-elastic-error.json
export NCCL_ASYNC_ERROR_HANDLING=1
export NCCL_IB_TIMEOUT=50
export UCX_RC_TIMEOUT=4s
export NCCL_IB_RETRY_CNT=10
export NCCL_SOCKET_IFNAME=ib0
export NCCL_DEBUG=INFO
export MASTER_ADDR=127.0.0.1
export MASTER_PORT=60234
export MAX_JOBS=$SLURM_JOB_CPUS_PER_NODE

SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-${(%):-%x}}" )" &> /dev/null && pwd )


export CMD=" \
$SCRIPT_DIR/../tools/run_text_generation_server.py \
--load /p/scratch/opengptx-elm/ali5/opengpt/megatron-lm/2023-07-27_17-52-53/output_dir/2_6B_multilingual-bpe_sp_32768_10_rotary.sh/checkpoints \
--tokenizer-model /p/scratch/opengptx-elm/data/datasources_opgptx/data_quality_experiments_datasets/ablations_studies/multilingual/70B_EQW_10/tokenizer_training/bpe/sp/32768_10/bpe_tokenizer.model \
--tokenizer-type OpenGPTX-SPTokenizer \
--pipeline-model-parallel-size 1 \
--tensor-model-parallel-size 2 \
--num-layers 32 \
--hidden-size 2560 \
--num-attention-heads 32 \
--max-position-embeddings 2048 \
--bf16 \
--micro-batch-size 5 \
--seq-length 2048 \
--out-seq-length 2048 \
--temperature 0.8 \
--top_p 0.5 \
--seed 42 \
--position-embedding-type rotary \
--no-position-embedding \
"

export LAUNCHER="python -u -m torch.distributed.run \
--nproc_per_node 2 \
--nnodes 1 \
--node_rank 0 \
--master_addr localhost \
--master_port 6000"


bash -c "$LAUNCHER $CMD"
Loading