-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patheval_checkpoint_gold.sh
51 lines (44 loc) · 2 KB
/
eval_checkpoint_gold.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#!/bin/bash
# Requested resources
#SBATCH --mem=32G
#SBATCH --cpus-per-task=1
#SBATCH --gres=gpu:a100:1
# Wall time and job details
#SBATCH --time=5:00:00
#SBATCH --job-name=eval-checkpoint-gold
#SBATCH --account=def-wanglab-ab
# Use this command to run the same job interactively
# salloc --mem=32G --cpus-per-task=1 --gres=gpu:a100:1 --time=3:00:00 --account=def-wanglab-ab
# salloc --mem=32G --cpus-per-task=1 --gres=gpu:a100:1 --time=3:00:00 --account=def-gbader
### Example usage ###
# Note that this script is intended be called by submit_eval_checkpoint.sh!
#
# sbatch "./scripts/slurm/eval.sh" "./conf/multinews/primera/eval.yml" \
# "./output/multinews/primera/training/checkpoint-702" \
# "./output/results/multinews/primera/trained_with_retrieval/checkpoint-702"
### Usage notes ###
# The amount of time needed will depend on the model and dataset, but it should be ~5 hours or less.
### Environment ###
# Add your W&B key here to enable W&B reporting (or login with wandb login)
# export WANDB_API_KEY=""
module purge # suggested in alliancecan docs: https://docs.alliancecan.ca/wiki/Running_jobs
module load StdEnv/2020 gcc/9.3.0 python/3.9 arrow/8.0.0
PROJECT_NAME="open-mds"
source "$HOME/$PROJECT_NAME/bin/activate"
cd "$HOME/projects/def-gbader/$USER/$PROJECT_NAME" || exit
### Script arguments ###
# Required arguments
CONFIG_FILEPATH="$1" # The path on disk to the yml config file
OUTPUT_DIR="$2" # The path on disk to save the output to
MODEL_NAME_OR_PATH="$3" # The name (or path on disk) of the model to evaluate
### Job ###
# This calls a modified version of the example summarization script from HF (with Trainer). For details,
# see: https://github.com/huggingface/transformers/tree/main/examples/pytorch/summarization#with-trainer
WANDB_MODE=offline \
TRANSFORMERS_OFFLINE=1 \
HF_DATASETS_OFFLINE=1 \
python "./scripts/run_summarization.py" "./conf/base.yml" "$CONFIG_FILEPATH" \
output_dir="$OUTPUT_DIR" \
model_name_or_path="$MODEL_NAME_OR_PATH" \
seed="42"
exit