We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e58413a commit add4651Copy full SHA for add4651
bigcodebench/evaluate.py
@@ -283,7 +283,8 @@ def stucking_checker():
283
json.dump(results, f, indent=2)
284
285
pass_at_k_path = result_path.replace("_eval_results.json", "_pass_at_k.json")
286
- pass_at_k["model"] = flags.samples.split("/")[-1].replace(".jsonl", "")
+ pass_at_k["model"] = os.path.basename(flags.samples).split("--bigcodebench-")[0]
287
+ pass_at_k["calibrated"] = "sanitized-calibrated" in flags.samples
288
pass_at_k["subset"] = flags.subset
289
290
def save_pass_at_k():
0 commit comments