Skip to content

Commit 7673dbb

Browse files
authored
Merge pull request #224 from Tagl/pac-tle-bugfix
PAC incorrect TLE verdict bugfix
2 parents 459848a + f5ff2b5 commit 7673dbb

File tree

1 file changed

+76
-56
lines changed

1 file changed

+76
-56
lines changed

problemtools/verifyproblem.py

Lines changed: 76 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -214,25 +214,26 @@ def _check_symlinks(self):
214214
return False
215215
return True
216216

217-
def run_submission(self, sub, args, timelim_low, timelim_high):
218-
res1, res2, reused = self._run_submission_real(sub, args, timelim_low, timelim_high)
219-
res1 = self._init_result_for_testcase(res1)
220-
res2 = self._init_result_for_testcase(res2)
217+
def run_submission(self, sub, args, timelim, timelim_low, timelim_high):
218+
res, res_low, res_high, reused = self._run_submission_real(sub, args, timelim, timelim_low, timelim_high)
219+
res = self._init_result_for_testcase(res)
220+
res_low = self._init_result_for_testcase(res_low)
221+
res_high = self._init_result_for_testcase(res_high)
221222
msg = "Reused test file result" if reused else "Test file result"
222-
self.info('%s: %s' % (msg, res1))
223-
if res1.verdict != 'AC' and self.is_in_sample_group():
224-
res1.sample_failures.append(res1)
223+
self.info('%s: %s' % (msg, res))
224+
if res.verdict != 'AC' and self.is_in_sample_group():
225+
res.sample_failures.append(res)
225226

226-
return (res1, res2)
227+
return (res, res_low, res_high)
227228

228-
def _run_submission_real(self, sub, args, timelim_low, timelim_high):
229+
def _run_submission_real(self, sub, args, timelim, timelim_low, timelim_high):
229230
if self.reuse_result_from is not None:
230-
return self.reuse_result_from._run_submission_real(sub, args, timelim_low, timelim_high)
231+
return self.reuse_result_from._run_submission_real(sub, args, timelim, timelim_low, timelim_high)
231232

232-
cache_key = (sub, args, timelim_low, timelim_high)
233+
cache_key = (sub, args, timelim, timelim_low, timelim_high)
233234
if self._result_cache[0] == cache_key:
234-
res1, res2 = self._result_cache[1]
235-
return (res1, res2, True)
235+
res, res_low, res_high = self._result_cache[1]
236+
return (res, res_low, res_high, True)
236237

237238
outfile = os.path.join(self._problem.tmpdir, 'output')
238239
if sys.stdout.isatty():
@@ -241,34 +242,43 @@ def _run_submission_real(self, sub, args, timelim_low, timelim_high):
241242
sys.stdout.flush()
242243

243244
if self._problem.is_interactive:
244-
res2 = self._problem.output_validators.validate_interactive(self, sub, timelim_high, self._problem.submissions)
245+
res_high = self._problem.output_validators.validate_interactive(self, sub, timelim_high, self._problem.submissions)
245246
else:
246247
status, runtime = sub.run(self.infile, outfile,
247248
timelim=timelim_high+1,
248249
memlim=self._problem.config.get('limits')['memory'])
249250
if is_TLE(status) or runtime > timelim_high:
250-
res2 = SubmissionResult('TLE')
251+
res_high = SubmissionResult('TLE')
251252
elif is_RTE(status):
252-
res2 = SubmissionResult('RTE')
253+
res_high = SubmissionResult('RTE')
253254
else:
254-
res2 = self._problem.output_validators.validate(self, outfile)
255-
res2.runtime = runtime
255+
res_high = self._problem.output_validators.validate(self, outfile)
256+
res_high.runtime = runtime
256257
if sys.stdout.isatty():
257258
sys.stdout.write('%s' % '\b \b' * (len(msg)))
258-
if res2.runtime <= timelim_low:
259-
res1 = res2
260-
elif res2.validator_first and res2.verdict == 'WA':
259+
if res_high.runtime <= timelim_low:
260+
res_low = res_high
261+
res = res_high
262+
elif res_high.runtime <= timelim:
263+
res_low = SubmissionResult('TLE')
264+
res = res_high
265+
elif res_high.validator_first and res_high.verdict == 'WA':
261266
# WA can override TLE for interactive problems (see comment in validate_interactive).
262-
res1 = SubmissionResult('WA')
263-
res1.validator_first = True
264-
res2.runtime = timelim_low
267+
res = SubmissionResult('WA')
268+
res.validator_first = True
269+
res_low = res
270+
res_high.runtime = timelim_low
265271
else:
266-
res1 = SubmissionResult('TLE')
267-
res1.runtime = res2.runtime
268-
res1.set_ac_runtime()
269-
res2.set_ac_runtime()
270-
self._result_cache = (cache_key, (res1, res2))
271-
return (res1, res2, False)
272+
res_low = SubmissionResult('TLE')
273+
res = res_low
274+
275+
res.runtime = res_high.runtime
276+
res_low.runtime = res_high.runtime
277+
res.set_ac_runtime()
278+
res_low.set_ac_runtime()
279+
res_high.set_ac_runtime()
280+
self._result_cache = (cache_key, (res, res_low, res_high))
281+
return (res, res_low, res_high, False)
272282

273283
def _init_result_for_testcase(self, res):
274284
res = copy.copy(res)
@@ -527,22 +537,31 @@ def parse_num(s, i):
527537
return self._check_res
528538

529539

530-
def run_submission(self, sub, args, timelim_low, timelim_high):
540+
def run_submission(self, sub, args, timelim, timelim_low, timelim_high):
531541
self.info('Running on %s' % self)
532-
subres1 = []
533-
subres2 = []
542+
subres = []
543+
subres_low = []
544+
subres_high = []
545+
active_low, active = True, True
534546
on_reject = self.config['on_reject']
535547
for child in self._items:
536548
if not child.matches_filter(args.data_filter):
537549
continue
538-
r1, r2 = child.run_submission(sub, args, timelim_low, timelim_high)
539-
subres1.append(r1)
540-
subres2.append(r2)
541-
if on_reject == 'break' and r2.verdict != 'AC':
542-
break
550+
res, res_low, res_high = child.run_submission(sub, args, timelim, timelim_low, timelim_high)
551+
subres_high.append(res_high)
552+
if active:
553+
subres.append(res)
554+
if active_low:
555+
subres_low.append(res_low)
556+
if on_reject == 'break':
557+
active_low &= res_low.verdict == 'AC'
558+
active &= res.verdict == 'AC'
559+
if res_high.verdict != 'AC':
560+
break
543561

544-
return (self.aggregate_results(sub, subres1),
545-
self.aggregate_results(sub, subres2, shadow_result=True))
562+
return (self.aggregate_results(sub, subres),
563+
self.aggregate_results(sub, subres_low, shadow_result=True),
564+
self.aggregate_results(sub, subres_high, shadow_result=True))
546565

547566

548567
def aggregate_results(self, sub, sub_results, shadow_result=False):
@@ -1591,33 +1610,34 @@ def check_submission(self, sub, args, expected_verdict, timelim, timelim_low, ti
15911610
# to make sure we have margin in both directions.
15921611
expected_verdict = 'AC'
15931612
partial = True
1594-
timelim = timelim_low
1613+
else:
1614+
timelim_low = timelim
15951615

1596-
result1, result2 = self._problem.testdata.run_submission(sub, args, timelim, timelim_high)
1616+
result, result_low, result_high = self._problem.testdata.run_submission(sub, args, timelim, timelim_low, timelim_high)
15971617

1598-
if result1.verdict == 'AC' and expected_verdict == 'AC' and not partial and result1.sample_failures:
1599-
res = result1.sample_failures[0]
1618+
if result.verdict == 'AC' and expected_verdict == 'AC' and not partial and result.sample_failures:
1619+
res = result.sample_failures[0]
16001620
self.warning('%s got %s on sample: %s' % (desc, res.verdict, res))
16011621

1602-
if result1.verdict != result2.verdict or result1.score != result2.score:
1603-
r1, r2 = (result1, result2) if result1.verdict == result2.verdict else (result1.verdict, result2.verdict)
1604-
self.warning('%s sensitive to time limit: limit of %s secs -> %s, limit of %s secs -> %s' % (desc, timelim, r1, timelim_high, r2))
1622+
if result_low.verdict != result_high.verdict or result_low.score != result_high.score:
1623+
r1, r2 = (result_low, result_high) if result_low.verdict == result_high.verdict else (result_low.verdict, result_high.verdict)
1624+
self.warning('%s sensitive to time limit: limit of %s secs -> %s, limit of %s secs -> %s' % (desc, timelim_low, r1, timelim_high, r2))
16051625

1606-
if partial and self.fully_accepted(result1):
1607-
self.warning('%s got %s' % (desc, result1))
1608-
elif result1.verdict == expected_verdict:
1609-
self.msg(' %s OK: %s' % (desc, result1))
1626+
if partial and self.fully_accepted(result):
1627+
self.warning('%s got %s' % (desc, result))
1628+
elif result.verdict == expected_verdict:
1629+
self.msg(' %s OK: %s' % (desc, result))
16101630
if (expected_verdict == 'AC' and not partial
1611-
and not self.fully_accepted(result1)
1631+
and not self.fully_accepted(result)
16121632
and self.full_score_finite()):
16131633
# For some heuristic problems, this is expected. Thus, only warn.
16141634
self.warning('%s did not attain full score (consider moving it to partially_accepted)' % desc)
1615-
elif result2.verdict == expected_verdict and not (partial and self.fully_accepted(result2)):
1616-
self.msg(' %s OK with extra time: %s' % (desc, result2))
1635+
elif result_high.verdict == expected_verdict and not (partial and self.fully_accepted(result_high)):
1636+
self.msg(' %s OK with extra time: %s' % (desc, result_high))
16171637
else:
1618-
self.error('%s got %s' % (desc, result1), result2.additional_info)
1638+
self.error('%s got %s' % (desc, result), result_high.additional_info)
16191639

1620-
return result1
1640+
return result
16211641

16221642
def full_score_finite(self):
16231643
min_score, max_score = self._problem.testdata.get_score_range()

0 commit comments

Comments
 (0)