Skip to content

Commit 4c79164

Browse files
committed
Deployed f99c131 with MkDocs version: 1.6.1
1 parent 6dbd478 commit 4c79164

File tree

3 files changed

+17
-17
lines changed

3 files changed

+17
-17
lines changed

analysis/index.html

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -401,7 +401,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
401401
<tr>
402402
<th>Name</th>
403403
<th style="text-align: center;">Repos Resolved (/16)</th>
404-
<th style="text-align: center;">Total Tests Passed (/3628)</th>
404+
<th style="text-align: center;">% Tests Passed (Total: 3628)</th>
405405
<th style="text-align: center;">Test Duration (s)</th>
406406
<th style="text-align: center;">Date</th>
407407
<th>Analysis</th>
@@ -412,7 +412,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
412412
<tr>
413413
<td>Reference (Gold)</td>
414414
<td style="text-align: center;">10</td>
415-
<td style="text-align: center;">3628</td>
415+
<td style="text-align: center;">100.00%</td>
416416
<td style="text-align: center;">21.00</td>
417417
<td style="text-align: center;">NA</td>
418418
<td><a href="/analysis_commit0_lite_reference">Analysis</a></td>
@@ -421,7 +421,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
421421
<tr>
422422
<td>OpenHands (subset of <code>all</code>)</td>
423423
<td style="text-align: center;">2</td>
424-
<td style="text-align: center;">3217</td>
424+
<td style="text-align: center;">88.67%</td>
425425
<td style="text-align: center;">408.27</td>
426426
<td style="text-align: center;">11/25/2024</td>
427427
<td><a href="/analysis_openhands-commit0_openhands">Analysis</a></td>
@@ -430,7 +430,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
430430
<tr>
431431
<td>Claude Sonnet 3.5 - Fill-in (subset of <code>all</code>)</td>
432432
<td style="text-align: center;">0</td>
433-
<td style="text-align: center;">694</td>
433+
<td style="text-align: center;">19.13%</td>
434434
<td style="text-align: center;">629.19</td>
435435
<td style="text-align: center;">09/25/2024</td>
436436
<td><a href="/analysis_commit0-all-plain_fillin">Analysis</a></td>
@@ -439,7 +439,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
439439
<tr>
440440
<td>Claude Sonnet 3.5 - Fill-in + Unit Test Feedback</td>
441441
<td style="text-align: center;">0</td>
442-
<td style="text-align: center;">619</td>
442+
<td style="text-align: center;">17.06%</td>
443443
<td style="text-align: center;">552.79</td>
444444
<td style="text-align: center;">09/25/2024</td>
445445
<td><a href="/analysis_commit0-lite-with-test_fillin">Analysis</a></td>
@@ -448,7 +448,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
448448
<tr>
449449
<td>Claude Sonnet 3.5 - Fill-in</td>
450450
<td style="text-align: center;">0</td>
451-
<td style="text-align: center;">381</td>
451+
<td style="text-align: center;">10.50%</td>
452452
<td style="text-align: center;">22.47</td>
453453
<td style="text-align: center;">09/25/2024</td>
454454
<td><a href="/analysis_commit0-lite-plain_fillin">Analysis</a></td>
@@ -457,7 +457,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
457457
<tr>
458458
<td>Claude Sonnet 3.5 - Base</td>
459459
<td style="text-align: center;">0</td>
460-
<td style="text-align: center;">376</td>
460+
<td style="text-align: center;">10.36%</td>
461461
<td style="text-align: center;">16.83</td>
462462
<td style="text-align: center;">09/25/2024</td>
463463
<td><a href="/analysis_test-save-commit0_baseline">Analysis</a></td>
@@ -466,7 +466,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
466466
<tr>
467467
<td>SWE-Agent (subset of <code>all</code>)</td>
468468
<td style="text-align: center;">0</td>
469-
<td style="text-align: center;">330</td>
469+
<td style="text-align: center;">9.10%</td>
470470
<td style="text-align: center;">62.35</td>
471471
<td style="text-align: center;">11/26/2024</td>
472472
<td><a href="/analysis_sweagent-commit0_sweagent">Analysis</a></td>
@@ -480,7 +480,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
480480
<tr>
481481
<th>Name</th>
482482
<th style="text-align: center;">Repos Resolved (/56)</th>
483-
<th style="text-align: center;">Total Tests Passed (/140926)</th>
483+
<th style="text-align: center;">% Tests Passed (Total: 140926)</th>
484484
<th style="text-align: center;">Test Duration (s)</th>
485485
<th style="text-align: center;">Date</th>
486486
<th>Analysis</th>
@@ -491,7 +491,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
491491
<tr>
492492
<td>Reference (Gold)</td>
493493
<td style="text-align: center;">19</td>
494-
<td style="text-align: center;">136766</td>
494+
<td style="text-align: center;">97.05%</td>
495495
<td style="text-align: center;">5467.81</td>
496496
<td style="text-align: center;">NA</td>
497497
<td><a href="/analysis_commit0_all_reference">Analysis</a></td>
@@ -500,7 +500,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
500500
<tr>
501501
<td>OpenHands</td>
502502
<td style="text-align: center;">2</td>
503-
<td style="text-align: center;">3217</td>
503+
<td style="text-align: center;">2.28%</td>
504504
<td style="text-align: center;">408.27</td>
505505
<td style="text-align: center;">11/25/2024</td>
506506
<td><a href="/analysis_openhands-commit0_openhands">Analysis</a></td>
@@ -509,7 +509,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
509509
<tr>
510510
<td>Claude Sonnet 3.5 - Fill-in</td>
511511
<td style="text-align: center;">0</td>
512-
<td style="text-align: center;">694</td>
512+
<td style="text-align: center;">0.49%</td>
513513
<td style="text-align: center;">629.19</td>
514514
<td style="text-align: center;">09/25/2024</td>
515515
<td><a href="/analysis_commit0-all-plain_fillin">Analysis</a></td>
@@ -518,7 +518,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
518518
<tr>
519519
<td>SWE-Agent</td>
520520
<td style="text-align: center;">0</td>
521-
<td style="text-align: center;">330</td>
521+
<td style="text-align: center;">0.23%</td>
522522
<td style="text-align: center;">62.35</td>
523523
<td style="text-align: center;">11/26/2024</td>
524524
<td><a href="/analysis_sweagent-commit0_sweagent">Analysis</a></td>

render_submissions.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ def get_blank_repo_metrics(
164164

165165

166166
leaderboard_header = """\n\n## Leaderboard ({split})
167-
| Name | Repos Resolved (/{num_repos}) | Total Tests Passed (/{total_num_tests}) | Test Duration (s) | Date | Analysis | Github |
167+
| Name | Repos Resolved (/{num_repos}) | % Tests Passed (Total: {total_num_tests}) | Test Duration (s) | Date | Analysis | Github |
168168
|------|:-------------------------:|:--------------------:|:--------------------:|:----------:|----|----| """
169169

170170
submission_table_header = """# Submission Name: **{display_name}** (split: {split})
@@ -334,7 +334,7 @@ def render_mds(overwrite_previous, subfolder="docs"):
334334
leaderboard[split].append((cum_tests_passed,
335335
f"\n|{display_name}|"
336336
f"{repos_resolved}|"
337-
f"{cum_tests_passed}|"
337+
f"{100.*cum_tests_passed/split_to_total_tests[split]:.2f}%|"
338338
f"{total_duration:.2f}|"
339339
f"{submission_date}|"
340340
f"{analysis_link}|"
@@ -344,7 +344,7 @@ def render_mds(overwrite_previous, subfolder="docs"):
344344
leaderboard["lite"].append((lite_cum_tests_passed,
345345
f"\n|{display_name} (subset of `all`)|"
346346
f"{lite_repos_resolved}|"
347-
f"{lite_cum_tests_passed}|"
347+
f"{100.*lite_cum_tests_passed/split_to_total_tests['lite']:.2f}%|"
348348
f"{lite_total_duration:.2f}|"
349349
f"{submission_date}|"
350350
f"{analysis_link}|"

search/search_index.json

Lines changed: 1 addition & 1 deletion
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)