Skip to content

Commit 0eed353

Browse files
committed
wip
1 parent 2e1adcb commit 0eed353

File tree

12 files changed

+669
-35
lines changed

12 files changed

+669
-35
lines changed

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,7 @@ LANGFUSE_HOST="this_is_not_a_secret"
7373
# Misc
7474

7575
CI=""
76+
77+
# For CI and running CLI commands as demo org
78+
LOCAL_CREDENTIALS_ORG_OPENAI_API_KEY=""
79+
LOCAL_CREDENTIALS_API_KEY=""

Taskfile.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
version: '3'
2+
3+
includes:
4+
backend:
5+
taskfile: ./backend/Taskfile.yml
6+
dir: ./backend

backend/Taskfile.yml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
version: '3'
2+
3+
tasks:
4+
eval:
5+
desc: Run evaluation on kunji dataset
6+
dotenv:
7+
- ../.env
8+
cmds:
9+
- uv run ai-cli eval responses {{.CLI_ARGS}}
10+
11+
seed:
12+
desc: Seed the database with initial data
13+
dotenv:
14+
- ../.env
15+
cmds:
16+
- uv run python -m app.seed_data.seed_data
17+
18+
logs:
19+
desc: Show backend logs from docker compose
20+
cmds:
21+
- docker compose -f ../docker-compose.yml logs -f backend

backend/app/cli/bench/commands.py

Lines changed: 4 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import os
22
import csv
3-
import json
43
import logging
54
import time
65
from datetime import datetime
@@ -11,6 +10,8 @@
1110
import typer
1211
from tqdm import tqdm
1312

13+
from ..utils import estimate_cost
14+
1415
logging.basicConfig(
1516
level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s - %(message)s"
1617
)
@@ -45,7 +46,7 @@ class ResponsesDatasetConfig:
4546

4647

4748
def load_instructions(filename: str) -> str:
48-
with open(os.path.join(os.path.dirname(__file__), "data", filename), "r") as file:
49+
with open(os.path.join(os.path.dirname(__file__), "..", "data", filename), "r") as file:
4950
return file.read()
5051

5152

@@ -99,38 +100,6 @@ class BenchItem:
99100
model: str
100101

101102

102-
def estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
103-
GPT_4o_MINI_2024_07_18_COSTING = {
104-
"input": 0.15,
105-
"cached_input": 0.075,
106-
"output": 0.60,
107-
}
108-
109-
GPT_4o_2024_08_06_COSTING = {
110-
"input": 2.50,
111-
"cached_input": 1.25,
112-
"output": 10.00,
113-
}
114-
115-
usd_per_1m = {
116-
"gpt-4o": GPT_4o_2024_08_06_COSTING,
117-
"gpt-4o-2024-08-06": GPT_4o_2024_08_06_COSTING,
118-
"gpt-4o-mini": GPT_4o_MINI_2024_07_18_COSTING,
119-
"gpt-4o-mini-2024-07-18": GPT_4o_MINI_2024_07_18_COSTING,
120-
# Extend with more models as needed: https://platform.openai.com/docs/pricing
121-
}
122-
123-
pricing = usd_per_1m.get(model.lower())
124-
if not pricing:
125-
logging.warning(f"No pricing found for model '{model}'. Returning cost = 0.")
126-
return 0.0
127-
128-
# We don't care about cached_input for now, this just to be mindful of upper bound cost to run benchmark
129-
input_cost = (input_tokens / 1_000_000) * pricing["input"]
130-
output_cost = (output_tokens / 1_000_000) * pricing["output"]
131-
return input_cost + output_cost
132-
133-
134103
def output_csv(items: List[BenchItem]):
135104
filename = f"bench_results_{datetime.now().strftime('%Y%m%d%H%M%S')}.csv"
136105
file_exists = os.path.exists(filename)
@@ -224,7 +193,7 @@ def load_and_dedupe_csv(
224193
) -> List[dict]:
225194
"""Load and deduplicate CSV data for benchmarking."""
226195
csv_file_path = os.path.join(
227-
os.path.dirname(__file__), "data", dataset_config.filename
196+
os.path.dirname(__file__), "..", "data", dataset_config.filename
228197
)
229198
with open(csv_file_path, "r") as file:
230199
csv_reader = csv.DictReader(file)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.

backend/app/cli/eval/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)