forked from SWE-agent/SWE-agent
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_and_eval.sh
29 lines (25 loc) · 1.03 KB
/
run_and_eval.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/bin/bash
#this script runs and evaluates the agent N times.
#to run:
#bash run_and_eval.sh '' default_with_inclusive_edit_demo_v2 data/dev-easy/swe-bench-dev-easy-med.json 3
# vars: suffix template data number of runs
# define user variables
suffix=${1:-''}
template=$2
dataset_path=$3
num_runs=$4
# extract filename from the dataset path
dataset_name=`basename $dataset_path`
for((i=1; i<=num_runs; i++)); do
# command 1
python run.py --model_name gpt4 --data_path $dataset_path --config_file config/configs/$template.yaml --suffix ${suffix}run${i} --temperature 0.2 --top_p 0.95 --per_instance_cost_limit 3.00 --install_environment 1
# command 2
python evaluation/evaluation.py \
--predictions_path trajectories/$USER/gpt4__${dataset_name}__$template__t-0.20__p-0.95__c-3.00__install-1__${suffix}run${i}/all_preds.jsonl \
--swe_bench_tasks $dataset_path \
--log_dir ./results \
--testbed ./testbed \
--skip_existing \
--timeout 900 \
--verbose
done