|
1 | 1 | import os |
2 | 2 | import csv |
3 | | -import json |
4 | 3 | import logging |
5 | 4 | import time |
6 | 5 | from datetime import datetime |
|
11 | 10 | import typer |
12 | 11 | from tqdm import tqdm |
13 | 12 |
|
| 13 | +from ..utils import estimate_cost |
| 14 | + |
14 | 15 | logging.basicConfig( |
15 | 16 | level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s - %(message)s" |
16 | 17 | ) |
@@ -45,7 +46,7 @@ class ResponsesDatasetConfig: |
45 | 46 |
|
46 | 47 |
|
47 | 48 | def load_instructions(filename: str) -> str: |
48 | | - with open(os.path.join(os.path.dirname(__file__), "data", filename), "r") as file: |
| 49 | + with open(os.path.join(os.path.dirname(__file__), "..", "data", filename), "r") as file: |
49 | 50 | return file.read() |
50 | 51 |
|
51 | 52 |
|
@@ -99,38 +100,6 @@ class BenchItem: |
99 | 100 | model: str |
100 | 101 |
|
101 | 102 |
|
102 | | -def estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float: |
103 | | - GPT_4o_MINI_2024_07_18_COSTING = { |
104 | | - "input": 0.15, |
105 | | - "cached_input": 0.075, |
106 | | - "output": 0.60, |
107 | | - } |
108 | | - |
109 | | - GPT_4o_2024_08_06_COSTING = { |
110 | | - "input": 2.50, |
111 | | - "cached_input": 1.25, |
112 | | - "output": 10.00, |
113 | | - } |
114 | | - |
115 | | - usd_per_1m = { |
116 | | - "gpt-4o": GPT_4o_2024_08_06_COSTING, |
117 | | - "gpt-4o-2024-08-06": GPT_4o_2024_08_06_COSTING, |
118 | | - "gpt-4o-mini": GPT_4o_MINI_2024_07_18_COSTING, |
119 | | - "gpt-4o-mini-2024-07-18": GPT_4o_MINI_2024_07_18_COSTING, |
120 | | - # Extend with more models as needed: https://platform.openai.com/docs/pricing |
121 | | - } |
122 | | - |
123 | | - pricing = usd_per_1m.get(model.lower()) |
124 | | - if not pricing: |
125 | | - logging.warning(f"No pricing found for model '{model}'. Returning cost = 0.") |
126 | | - return 0.0 |
127 | | - |
128 | | - # We don't care about cached_input for now, this just to be mindful of upper bound cost to run benchmark |
129 | | - input_cost = (input_tokens / 1_000_000) * pricing["input"] |
130 | | - output_cost = (output_tokens / 1_000_000) * pricing["output"] |
131 | | - return input_cost + output_cost |
132 | | - |
133 | | - |
134 | 103 | def output_csv(items: List[BenchItem]): |
135 | 104 | filename = f"bench_results_{datetime.now().strftime('%Y%m%d%H%M%S')}.csv" |
136 | 105 | file_exists = os.path.exists(filename) |
@@ -224,7 +193,7 @@ def load_and_dedupe_csv( |
224 | 193 | ) -> List[dict]: |
225 | 194 | """Load and deduplicate CSV data for benchmarking.""" |
226 | 195 | csv_file_path = os.path.join( |
227 | | - os.path.dirname(__file__), "data", dataset_config.filename |
| 196 | + os.path.dirname(__file__), "..", "data", dataset_config.filename |
228 | 197 | ) |
229 | 198 | with open(csv_file_path, "r") as file: |
230 | 199 | csv_reader = csv.DictReader(file) |
|
0 commit comments