forked from nii-yamagishilab/project-NN-Pytorch-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01_train.sh
74 lines (65 loc) · 2.04 KB
/
01_train.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/bash
########################
# Script for training
# Usage:
# 1. please check that config.py has been properly configured
# 2. $: bash 00_train.sh SEED CONFIG_NAME > /dev/null 2>&1 &
# 4. please check log_train and log_err to monitor the training
# process
########################
# the random seed
SEED=$1
# the name of the training config file
CONFIG=$2
# folder name
model_dir=$3
log_train_name=log_train
log_err_name=log_train_err
####
# step1. copy files & enter the folder
if [ -d ${model_dir} ];
then
cp ./main.py ${model_dir}
cp ./${CONFIG}.py ${model_dir}
cp ${model_dir}/../../model.py ${model_dir}
cd ${model_dir}
else
echo "Cannot find ${model_dir}"
exit 1;
fi
####
# step2. decide whether this model requires SSL fine-tune
FINETUNE=`echo ${model_dir} | grep ft | wc -l`
if [ ${FINETUNE} -gt 0 ];
then
echo "Training process will fine-tune SSL"
# command to train model with SSL fine-tuned
# the learning rate and batch size are different
com="python main.py --model-forward-with-file-name
--num-workers 8 --epochs 100
--no-best-epochs 10 --batch-size 8
--sampler block_shuffle_by_length --lr-decay-factor 0.5
--lr-scheduler-type 1 --lr 0.000001
--not-save-each-epoch --seed ${SEED}
--module-config ${CONFIG}
>${log_train_name} 2>${log_err_name}"
else
echo "Training process use fixed SSL, no fine-tuning"
# command to train model without fine-tuning SSL
com="python main.py --model-forward-with-file-name
--num-workers 3 --epochs 100
--no-best-epochs 10 --batch-size 64
--sampler block_shuffle_by_length --lr-decay-factor 0.5
--lr-scheduler-type 1 --lr 0.0003
--not-save-each-epoch --seed ${SEED}
--module-config ${CONFIG}
>${log_train_name} 2>${log_err_name}"
fi
####
# step 3. training
echo -e "Training starts"
echo -e "Please monitor the log trainig: $PWD/${log_train_name}\n"
echo ${com}
eval ${com}
echo -e "Training process finished"
echo -e "Trainig log has been written to $PWD/${log_train_name}"