Skip to content

Implement Unified Line Coverage Formula #898

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions experiment/coverage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Standard coverage calculation functions to ensure consistency."""

from experiment import textcov


def calculate_coverage(cov: textcov.Textcov, linked_lines: int) -> float:
"""Calculates coverage according to formula: Cov(f) / Linked(f)."""
if not linked_lines:
return 0.0
return cov.covered_lines / linked_lines


def calculate_coverage_improvement(new_cov: textcov.Textcov,
existing_cov: textcov.Textcov,
union_linked_lines: int) -> float:
"""Calculates coverage improvement: [Cov(f1) - Cov(f0)] / [Linked(f1 ∪ f0)]."""
if not union_linked_lines:
return 0.0

# Make a copy to avoid modifying the original
diff_cov = new_cov.copy()
diff_cov.subtract_covered_lines(existing_cov)

return diff_cov.covered_lines / union_linked_lines
11 changes: 6 additions & 5 deletions experiment/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@

from google.cloud import storage

from experiment import builder_runner, oss_fuzz_checkout, textcov
from experiment import builder_runner
from experiment import coverage as coverage_utils
from experiment import oss_fuzz_checkout, textcov
from experiment.benchmark import Benchmark
from experiment.builder_runner import BuildResult, RunResult
from experiment.fuzz_target_error import SemanticCheckResult
Expand Down Expand Up @@ -461,11 +463,10 @@ def check_target(self, ai_binary, target_path: str) -> Result:
coverage_percent = 0.0

existing_textcov = self.load_existing_textcov()
if run_result.coverage:
run_result.coverage.subtract_covered_lines(existing_textcov)

if total_lines and run_result.coverage:
coverage_diff = run_result.coverage.covered_lines / total_lines
union_linked_lines = max(run_result.coverage.total_lines, total_lines)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I used the term "union" in the mathematical sense, where each line represents an element of a set. Each set can correspond to covered lines or linked lines in the project.

For example, suppose our fuzz target links lines {1,2,3,4} of the project at compile time and covers lines {1,2,3} during fuzzing, while an older fuzz target links lines {1,2,5} and covers {2,5} . Then:

  • The union linked lines is {1,2,3,4,5}.
  • The new fuzz target's coverage is 3/5 ({1,2,3} out of {1,2,3,4,5}).
  • The old fuzz target's coverage is 2/5 ({2,5} out of {1,2,3,4,5}).
  • The coverage increase is 2/5 (newly covered lines {1,3} out of {1,2,3,4,5}).

Ideally, the denominator should represent "the total reachable lines" or "the total number of lines" in the project. However, they are difficult to determine accurately if certain files are not linked at compile time.

Given this is a complicate task, please feel free to prioritize on writing and refining your proposal : )
That's the most important factor for you application.
If you have a draft, I am more than happy to provide general feedback to ensure you are on the right track.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are correct sir I'll prioritise my proposal firstly and will surely take a look back to it and completely resolve it, Thank you !

coverage_diff = coverage_utils.calculate_coverage_improvement(
run_result.coverage, existing_textcov, union_linked_lines)
else:
dual_logger.log(
f'Warning: total_lines == 0 in {generated_oss_fuzz_project}.')
Expand Down
5 changes: 5 additions & 0 deletions experiment/textcov.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from __future__ import annotations

import copy
import dataclasses
import json
import logging
Expand Down Expand Up @@ -573,3 +574,7 @@ class name specification use single upper case letter for
next_arg += '[]' * array_count
args.append(next_arg)
return args

def copy(self) -> 'Textcov':
"""Create a deep copy of this Textcov oject."""
return copy.deepcopy(self)
14 changes: 9 additions & 5 deletions report/aggregate_coverage_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

from google.cloud import storage

from experiment import coverage as coverage_utils
from experiment import evaluator, textcov


Expand All @@ -53,14 +54,17 @@ def compute_coverage_diff(project: str, coverage_links: list[str]):
# TODO: skip other functions defined the target.
new_textcov.merge(textcov.Textcov.from_file(f))

new_textcov.subtract_covered_lines(existing_textcov)
# union of linked lines
try:
total_lines = coverage_summary['data'][0]['totals']['lines']['count']
existing_lines = coverage_summary['data'][0]['totals']['lines']['count']
except KeyError:
total_lines = 1
existing_lines = 0

return new_textcov.covered_lines / total_lines
#print(f'{project}:', new_textcov.covered_lines / total_lines)
union_linked_lines = max(new_textcov.total_lines, existing_lines)

return coverage_utils.calculate_coverage_improvement(new_textcov,
existing_textcov,
union_linked_lines)


def main():
Expand Down
13 changes: 4 additions & 9 deletions run_all_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import run_one_experiment
from data_prep import introspector
from experiment import benchmark as benchmarklib
from experiment import coverage as coverage_utils
from experiment import evaluator, oss_fuzz_checkout, textcov
from experiment.workdir import WorkDirs
from llm_toolkit import models, prompt_builder
Expand Down Expand Up @@ -502,23 +503,17 @@ def _process_total_coverage_gain() -> dict[str, dict[str, Any]]:

total_existing_lines = sum(lines)
total_cov_covered_lines_before_subtraction = total_cov.covered_lines
total_cov.subtract_covered_lines(existing_textcov)
try:
cov_relative_gain = (total_cov.covered_lines /
existing_textcov.covered_lines)
except ZeroDivisionError:
cov_relative_gain = 0.0

total_lines = max(total_cov.total_lines, total_existing_lines)
union_linked_lines = max(total_cov.total_lines, total_existing_lines)

if total_lines:
coverage_gain[project] = {
'language':
oss_fuzz_checkout.get_project_language(project),
'coverage_diff':
total_cov.covered_lines / total_lines,
'coverage_relative_gain':
cov_relative_gain,
coverage_utils.calculate_coverage_improvement(
total_cov, existing_textcov, union_linked_lines),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: I think we have subtracted existing_textcov above:

total_existing_lines = sum(lines)
total_cov_covered_lines_before_subtraction = total_cov.covered_lines
total_cov.subtract_covered_lines(existing_textcov)

While this action should be idempotent, repeated actions add unnecessary complexity and may confuse readers.

'coverage_ofg_total_covered_lines':
total_cov_covered_lines_before_subtraction,
'coverage_ofg_total_new_covered_lines':
Expand Down
11 changes: 8 additions & 3 deletions stage/execution_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import os

from experiment import builder_runner as builder_runner_lib
from experiment import coverage as coverage_utils
from experiment import evaluator as evaluator_lib
from experiment.evaluator import Evaluator
from results import BuildResult, Result, RunResult
Expand Down Expand Up @@ -112,16 +113,20 @@ def execute(self, result_history: list[Result]) -> Result:
coverage_percent = 0.0

existing_textcov = evaluator.load_existing_textcov()
run_result.coverage.subtract_covered_lines(existing_textcov)
# Calculate linked lines union using both textcov objects to determine
# the total number of lines that could be covered
union_linked_lines = max(total_lines, existing_textcov.total_lines)

if total_lines:
coverage_diff = run_result.coverage.covered_lines / total_lines
if union_linked_lines:
coverage_diff = coverage_utils.calculate_coverage_improvement(
run_result.coverage, existing_textcov, union_linked_lines)
self.logger.info('coverage diff == %s in %s.', coverage_diff,
generated_oss_fuzz_project)
else:
self.logger.warning('total_lines == 0 in %s',
generated_oss_fuzz_project)
coverage_diff = 0.0

runresult = RunResult(
benchmark=benchmark,
trial=last_result.trial,
Expand Down