-
Notifications
You must be signed in to change notification settings - Fork 192
Fix(experiment): Update coverage calculation logic for #727 #960
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
06ce551
2acf980
3b7ecde
68d41ed
da115a8
02b374a
dad5627
b049b55
dbed9ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -49,6 +49,9 @@ class Result: | |
crashes: bool = False | ||
coverage: float = 0.0 | ||
line_coverage_diff: float = 0.0 | ||
newly_covered_lines: int = 0 | ||
total_lines: int = 0 | ||
baseline_total_lines: int = 0 | ||
coverage_report_path: str = '' | ||
reproducer_path: str = '' | ||
# Grammatically correct but has false positive or no cov increase at all. | ||
|
@@ -260,6 +263,20 @@ def __init__(self, runner: builder_runner.BuilderRunner, benchmark: Benchmark, | |
self.builder_runner = runner | ||
self.benchmark = benchmark | ||
self.work_dirs = work_dirs | ||
self.baseline_total_lines = 0 | ||
try: | ||
# Load baseline coverage summary to get total lines. | ||
baseline_summary = load_existing_coverage_summary(self.benchmark.project) | ||
if baseline_summary: | ||
target_basename = os.path.basename(self.benchmark.target_path) | ||
self.baseline_total_lines = \ | ||
compute_total_lines_without_fuzz_targets( | ||
baseline_summary, target_basename) | ||
logger.info('Baseline total lines for %s: %d', self.benchmark.project, | ||
self.baseline_total_lines) | ||
except Exception as e: | ||
logger.error('Failed to load baseline summary/calculate total lines: %s', e) | ||
# Keep baseline_total_lines as 0 | ||
|
||
def build_log_path(self, generated_target_name: str, iteration: int): | ||
return os.path.join(self.work_dirs.run_logs, | ||
|
@@ -429,48 +446,54 @@ def check_target(self, ai_binary, target_path: str) -> Result: | |
run_result = None | ||
|
||
# 2. Calculate coverage percentage and coverage diff | ||
coverage_summary = None | ||
total_lines = 0 | ||
coverage_percent = 0.0 | ||
coverage_diff = 0.0 | ||
if run_result: | ||
# Gets line coverage (diff) details. | ||
coverage_summary = self._load_existing_coverage_summary() | ||
|
||
if self.benchmark.language in ['python', 'jvm'] and run_result.coverage: | ||
# The Jacoco.xml coverage report used to generate summary.json on | ||
# OSS-Fuzz for JVM projects does not trace the source file location. | ||
# Thus the conversion may miss some classes because they are not | ||
# present during coverage report generation. This fix gets the total | ||
# line calculation from the jacoco.xml report of the current run | ||
# directly and compares it with the total_lines retrieved from | ||
# summary.json. Then the larger total_lines is used which is assumed | ||
# to be more accurate. This is the same case for python project which | ||
# the total line is determined from the all_cov.json file. | ||
total_lines = run_result.coverage.total_lines | ||
elif coverage_summary: | ||
total_lines = compute_total_lines_without_fuzz_targets( | ||
coverage_summary, generated_target_name) | ||
else: | ||
total_lines = 0 | ||
|
||
if run_result.total_pcs: | ||
coverage_percent = run_result.cov_pcs / run_result.total_pcs | ||
else: | ||
dual_logger.log( | ||
f'Warning: total_pcs == 0 in {generated_oss_fuzz_project}.') | ||
coverage_percent = 0.0 | ||
newly_covered_lines = 0 | ||
union_total_lines = 0 | ||
current_coverage_copy = None | ||
|
||
existing_textcov = self.load_existing_textcov() | ||
if run_result.coverage: | ||
run_result.coverage.subtract_covered_lines(existing_textcov) | ||
if run_result and run_result.coverage: | ||
current_coverage_copy = run_result.coverage.copy() | ||
|
||
if total_lines and run_result.coverage: | ||
coverage_diff = run_result.coverage.covered_lines / total_lines | ||
else: | ||
dual_logger.log( | ||
f'Warning: total_lines == 0 in {generated_oss_fuzz_project}.') | ||
coverage_diff = 0.0 | ||
if run_result: | ||
existing_textcov = self.load_existing_textcov() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you please add a TODO for this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
|
||
if current_coverage_copy: | ||
current_coverage_copy.merge(existing_textcov) | ||
union_total_lines = current_coverage_copy.total_lines | ||
else: | ||
union_total_lines = existing_textcov.total_lines | ||
|
||
if run_result.coverage: | ||
run_result.coverage.subtract_covered_lines(existing_textcov) | ||
newly_covered_lines = run_result.coverage.covered_lines | ||
else: | ||
newly_covered_lines = 0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I recon the conditions of these two blocks ( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes we can merge, good point, thanks |
||
|
||
if union_total_lines > 0: | ||
coverage_diff = newly_covered_lines / union_total_lines | ||
else: | ||
if newly_covered_lines > 0: | ||
dual_logger.log( | ||
f'Warning: union_total_lines is 0 but newly_covered_lines is {newly_covered_lines}. Cannot calculate coverage diff accurately.' | ||
) | ||
coverage_diff = 0.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As this part is getting more complex, could you please separate it into an individual function? |
||
|
||
total_lines_for_percent = 0 | ||
if self.benchmark.language in ['python', 'jvm'] and run_result.coverage: | ||
if current_coverage_copy: | ||
total_lines_for_percent = current_coverage_copy.total_lines | ||
elif self._load_existing_coverage_summary(): | ||
coverage_summary = self._load_existing_coverage_summary() | ||
total_lines_for_percent = compute_total_lines_without_fuzz_targets( | ||
coverage_summary, generated_target_name) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need to modify the logic here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As per my understanding this logic ensures that the overall coverage_percent accurately reflects the coverage achieved by the current fuzz target using its own relevant lines as the denominator, distinct from the coverage_diff which uses the union of lines. It correctly uses the pre-subtraction coverage data stored in current_coverage_copy for this calculation. That is my understanding of this please correct me if i i am wrong There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This makes sense, but:
if run_result:
# Gets line coverage (diff) details.
coverage_summary = self._load_existing_coverage_summary()
...
elif coverage_summary:
total_lines = compute_total_lines_without_fuzz_targets(
coverage_summary, generated_target_name)
elif coverage_summary:
total_lines = compute_total_lines_without_fuzz_targets(
coverage_summary, generated_target_name)
else:
total_lines = 0
|
||
|
||
if run_result.total_pcs: | ||
coverage_percent = run_result.cov_pcs / run_result.total_pcs | ||
else: | ||
dual_logger.log( | ||
f'Warning: Could not determine total lines for percentage calculation in {generated_oss_fuzz_project}.') | ||
coverage_percent = 0.0 | ||
|
||
if self.benchmark.language == 'jvm': | ||
# For JVM, the generation is consider success if either is true | ||
|
@@ -517,6 +540,9 @@ def check_target(self, ai_binary, target_path: str) -> Result: | |
False, | ||
0.0, | ||
0.0, | ||
0, | ||
0, | ||
0, | ||
'', | ||
'', | ||
False, | ||
|
@@ -538,6 +564,9 @@ def check_target(self, ai_binary, target_path: str) -> Result: | |
False, | ||
0.0, | ||
0.0, | ||
0, | ||
0, | ||
0, | ||
'', | ||
'', | ||
False, | ||
|
@@ -567,26 +596,34 @@ def check_target(self, ai_binary, target_path: str) -> Result: | |
run_result.crashes, | ||
0.0, | ||
0.0, | ||
0, | ||
0, | ||
0, | ||
'', | ||
'', | ||
not run_result.succeeded, | ||
run_result.semantic_check.type, | ||
run_result.triage, | ||
textcov.Textcov(), | ||
compile_error=build_result.log_path, | ||
compile_log=build_result.log_path)) | ||
|
||
dual_logger.log( | ||
f'Result for {generated_oss_fuzz_project}: ' | ||
f'crashes={run_result.crashes}, coverage={coverage_percent} ' | ||
f'crashes={run_result.crashes}, coverage={coverage_percent:.4f} ' | ||
f'({run_result.cov_pcs}/{run_result.total_pcs}), ' | ||
f'coverage diff={coverage_diff} ' | ||
f'({run_result.coverage.covered_lines}/{total_lines})') | ||
f'newly covered lines={newly_covered_lines}, union total lines={union_total_lines}, baseline total lines={self.baseline_total_lines}, ' | ||
f'coverage diff={coverage_diff:.4f}' | ||
) | ||
return dual_logger.return_result( | ||
Result(False, | ||
True, | ||
run_result.crashes, | ||
coverage_percent, | ||
coverage_diff, | ||
newly_covered_lines, | ||
union_total_lines, | ||
self.baseline_total_lines, | ||
run_result.coverage_report_path, | ||
run_result.reproducer_path, | ||
not run_result.succeeded, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIUC, this will only create a shallow copy, the modification below will affect the original
run_result.coverage
.Try using python's builtin
deepcopy
package.We can add a function in class
Textcov
for this.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The line current_coverage_copy = run_result.coverage.copy() in evaluator.py should therefore already be performing a deep copy, preventing unintended modifications to the original run_result.coverage object. I think it will do deep copy please correct me if i am wrong
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@demoncoder-crypto Could you confirmed this via a simple script?
The
coverage
is aTextcov
, which is a dataclass and does not have builtin function copy: