forked from xingyaoww/code-act
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathrun.sh
More file actions
executable file
·40 lines (30 loc) · 980 Bytes
/
run.sh
File metadata and controls
executable file
·40 lines (30 loc) · 980 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
pushd scripts/eval/api-bank
mkdir -p results
function run_eval() {
model_name=$1
echo "Evaluating $model_name"
for action_mode in "text_as_action" "json_as_action" "code_as_action"; do
echo "===================="
echo "action_mode: $action_mode"
python3 evaluator.py \
--data_dir lv1-lv2-samples/level-1-given-desc \
--action_mode $action_mode \
--model_name $model_name \
--output_dir results \
--api_test_enabled
done
}
# run_eval gpt-3.5-turbo-0613
# run_eval gpt-3.5-turbo-1106
# run_eval gpt-4-0613
# run_eval gpt-4-1106-preview
# run_eval text-davinci-003
# run_eval text-davinci-002
# run_eval claude-instant-1 # claude-instant-1.2
# run_eval claude-2 # claude-2.1
# run_eval gemini-pro
# NOTE: You may change the API endpoint to your own OpenAI-Complete API endpoint
# using vLLM: vllm.ai
export OPENAI_API_BASE=http://YOUR_API:8888/v1
run_eval YOUR_MODEL
popd