@@ -401,7 +401,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
401401< tr >
402402< th > Name</ th >
403403< th style ="text-align: center; "> Repos Resolved (/16)</ th >
404- < th style ="text-align: center; "> Total Tests Passed (/ 3628)</ th >
404+ < th style ="text-align: center; "> % Tests Passed (Total: 3628)</ th >
405405< th style ="text-align: center; "> Test Duration (s)</ th >
406406< th style ="text-align: center; "> Date</ th >
407407< th > Analysis</ th >
@@ -412,7 +412,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
412412< tr >
413413< td > Reference (Gold)</ td >
414414< td style ="text-align: center; "> 10</ td >
415- < td style ="text-align: center; "> 3628 </ td >
415+ < td style ="text-align: center; "> 100.00% </ td >
416416< td style ="text-align: center; "> 21.00</ td >
417417< td style ="text-align: center; "> NA</ td >
418418< td > < a href ="/analysis_commit0_lite_reference "> Analysis</ a > </ td >
@@ -421,7 +421,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
421421< tr >
422422< td > OpenHands (subset of < code > all</ code > )</ td >
423423< td style ="text-align: center; "> 2</ td >
424- < td style ="text-align: center; "> 3217 </ td >
424+ < td style ="text-align: center; "> 88.67% </ td >
425425< td style ="text-align: center; "> 408.27</ td >
426426< td style ="text-align: center; "> 11/25/2024</ td >
427427< td > < a href ="/analysis_openhands-commit0_openhands "> Analysis</ a > </ td >
@@ -430,7 +430,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
430430< tr >
431431< td > Claude Sonnet 3.5 - Fill-in (subset of < code > all</ code > )</ td >
432432< td style ="text-align: center; "> 0</ td >
433- < td style ="text-align: center; "> 694 </ td >
433+ < td style ="text-align: center; "> 19.13% </ td >
434434< td style ="text-align: center; "> 629.19</ td >
435435< td style ="text-align: center; "> 09/25/2024</ td >
436436< td > < a href ="/analysis_commit0-all-plain_fillin "> Analysis</ a > </ td >
@@ -439,7 +439,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
439439< tr >
440440< td > Claude Sonnet 3.5 - Fill-in + Unit Test Feedback</ td >
441441< td style ="text-align: center; "> 0</ td >
442- < td style ="text-align: center; "> 619 </ td >
442+ < td style ="text-align: center; "> 17.06% </ td >
443443< td style ="text-align: center; "> 552.79</ td >
444444< td style ="text-align: center; "> 09/25/2024</ td >
445445< td > < a href ="/analysis_commit0-lite-with-test_fillin "> Analysis</ a > </ td >
@@ -448,7 +448,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
448448< tr >
449449< td > Claude Sonnet 3.5 - Fill-in</ td >
450450< td style ="text-align: center; "> 0</ td >
451- < td style ="text-align: center; "> 381 </ td >
451+ < td style ="text-align: center; "> 10.50% </ td >
452452< td style ="text-align: center; "> 22.47</ td >
453453< td style ="text-align: center; "> 09/25/2024</ td >
454454< td > < a href ="/analysis_commit0-lite-plain_fillin "> Analysis</ a > </ td >
@@ -457,7 +457,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
457457< tr >
458458< td > Claude Sonnet 3.5 - Base</ td >
459459< td style ="text-align: center; "> 0</ td >
460- < td style ="text-align: center; "> 376 </ td >
460+ < td style ="text-align: center; "> 10.36% </ td >
461461< td style ="text-align: center; "> 16.83</ td >
462462< td style ="text-align: center; "> 09/25/2024</ td >
463463< td > < a href ="/analysis_test-save-commit0_baseline "> Analysis</ a > </ td >
@@ -466,7 +466,7 @@ <h2 id="leaderboard-lite">Leaderboard (lite)</h2>
466466< tr >
467467< td > SWE-Agent (subset of < code > all</ code > )</ td >
468468< td style ="text-align: center; "> 0</ td >
469- < td style ="text-align: center; "> 330 </ td >
469+ < td style ="text-align: center; "> 9.10% </ td >
470470< td style ="text-align: center; "> 62.35</ td >
471471< td style ="text-align: center; "> 11/26/2024</ td >
472472< td > < a href ="/analysis_sweagent-commit0_sweagent "> Analysis</ a > </ td >
@@ -480,7 +480,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
480480< tr >
481481< th > Name</ th >
482482< th style ="text-align: center; "> Repos Resolved (/56)</ th >
483- < th style ="text-align: center; "> Total Tests Passed (/ 140926)</ th >
483+ < th style ="text-align: center; "> % Tests Passed (Total: 140926)</ th >
484484< th style ="text-align: center; "> Test Duration (s)</ th >
485485< th style ="text-align: center; "> Date</ th >
486486< th > Analysis</ th >
@@ -491,7 +491,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
491491< tr >
492492< td > Reference (Gold)</ td >
493493< td style ="text-align: center; "> 19</ td >
494- < td style ="text-align: center; "> 136766 </ td >
494+ < td style ="text-align: center; "> 97.05% </ td >
495495< td style ="text-align: center; "> 5467.81</ td >
496496< td style ="text-align: center; "> NA</ td >
497497< td > < a href ="/analysis_commit0_all_reference "> Analysis</ a > </ td >
@@ -500,7 +500,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
500500< tr >
501501< td > OpenHands</ td >
502502< td style ="text-align: center; "> 2</ td >
503- < td style ="text-align: center; "> 3217 </ td >
503+ < td style ="text-align: center; "> 2.28% </ td >
504504< td style ="text-align: center; "> 408.27</ td >
505505< td style ="text-align: center; "> 11/25/2024</ td >
506506< td > < a href ="/analysis_openhands-commit0_openhands "> Analysis</ a > </ td >
@@ -509,7 +509,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
509509< tr >
510510< td > Claude Sonnet 3.5 - Fill-in</ td >
511511< td style ="text-align: center; "> 0</ td >
512- < td style ="text-align: center; "> 694 </ td >
512+ < td style ="text-align: center; "> 0.49% </ td >
513513< td style ="text-align: center; "> 629.19</ td >
514514< td style ="text-align: center; "> 09/25/2024</ td >
515515< td > < a href ="/analysis_commit0-all-plain_fillin "> Analysis</ a > </ td >
@@ -518,7 +518,7 @@ <h2 id="leaderboard-all">Leaderboard (all)</h2>
518518< tr >
519519< td > SWE-Agent</ td >
520520< td style ="text-align: center; "> 0</ td >
521- < td style ="text-align: center; "> 330 </ td >
521+ < td style ="text-align: center; "> 0.23% </ td >
522522< td style ="text-align: center; "> 62.35</ td >
523523< td style ="text-align: center; "> 11/26/2024</ td >
524524< td > < a href ="/analysis_sweagent-commit0_sweagent "> Analysis</ a > </ td >
0 commit comments