Skip to content

Refactor Result Classes with Unified Data Container and Improved Extensibility #948

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions agent/semantic_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from experiment.fuzz_target_error import SemanticCheckResult
from llm_toolkit.prompts import Prompt
from results import AnalysisResult, Result, RunResult
from results_wip import FuzzTargetResult

# Regex for extract function name.
FUNC_NAME = re.compile(r'(?:^|\s|\b)([\w:]+::)*(\w+)(?:<[^>]*>)?(?=\(|$)')
Expand Down Expand Up @@ -92,7 +93,7 @@ def _parse_libfuzzer_logs(self,
e,
trial=self.trial)
return ParseResult(0, 0, False, '',
SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP))
SemanticCheckResult(FuzzTargetResult.GEN_LOG_MESS_UP))

cov_pcs, total_pcs, crashes = 0, 0, False

Expand Down Expand Up @@ -130,30 +131,30 @@ def _parse_libfuzzer_logs(self,
if symptom == 'null-deref':
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.NULL_DEREF, symptom,
SemanticCheckResult(FuzzTargetResult.NON_SEC_CRASH_NULL_DEREF, symptom,
crash_stacks, crash_func))

# Signal, normally indicating assertion failure due to inadequate
# parameter initialization or wrong function usage.
if symptom == 'signal':
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.SIGNAL, symptom,
SemanticCheckResult(FuzzTargetResult.NON_SEC_CRASH_SIGNAL, symptom,
crash_stacks, crash_func))

# Exit, normally indicating the fuzz target exited in a controlled manner,
# blocking its bug discovery.
if symptom.endswith('fuzz target exited'):
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.EXIT, symptom, crash_stacks,
SemanticCheckResult(FuzzTargetResult.NON_SEC_CRASH_EXIT, symptom, crash_stacks,
crash_func))

# Fuzz target modified constants.
if symptom.endswith('fuzz target overwrites its const input'):
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.OVERWRITE_CONST, symptom,
SemanticCheckResult(FuzzTargetResult.GEN_OVERWRITE_CONST, symptom,
crash_stacks, crash_func))

# OOM, normally indicating malloc's parameter is too large, e.g., because
Expand All @@ -163,7 +164,7 @@ def _parse_libfuzzer_logs(self,
if 'out-of-memory' in symptom or 'out of memory' in symptom:
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.FP_OOM, symptom,
SemanticCheckResult(FuzzTargetResult.FP_OOM, symptom,
crash_stacks, crash_func))

# FP case 2: fuzz target crashes at init or first few rounds.
Expand All @@ -172,7 +173,7 @@ def _parse_libfuzzer_logs(self,
# This is very likely the false positive cases.
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.FP_NEAR_INIT_CRASH, symptom,
SemanticCheckResult(FuzzTargetResult.FP_NEAR_INIT_CRASH, symptom,
crash_stacks, crash_func))

# FP case 3: no func in 1st thread stack belongs to testing proj.
Expand All @@ -183,13 +184,13 @@ def _parse_libfuzzer_logs(self,
if 'LLVMFuzzerTestOneInput' in stack_frame:
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.FP_TARGET_CRASH,
SemanticCheckResult(FuzzTargetResult.FP_TARGET_CRASH,
symptom, crash_stacks, crash_func))
break

return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.NO_SEMANTIC_ERR, symptom,
SemanticCheckResult(FuzzTargetResult.NORM_NO_SEMANTIC_ERR, symptom,
crash_stacks, crash_func))

if check_cov_increase and initcov == donecov and lastround is not None:
Expand All @@ -199,10 +200,10 @@ def _parse_libfuzzer_logs(self,
# all inputs we tried.
return ParseResult(
cov_pcs, total_pcs, False, '',
SemanticCheckResult(SemanticCheckResult.NO_COV_INCREASE))
SemanticCheckResult(FuzzTargetResult.COV_NO_INCREASE))

return ParseResult(cov_pcs, total_pcs, crashes, '',
SemanticCheckResult(SemanticCheckResult.NO_SEMANTIC_ERR))
SemanticCheckResult(FuzzTargetResult.NORM_NO_SEMANTIC_ERR))

def _parse_fuzz_cov_info_from_libfuzzer_logs(
self,
Expand Down
23 changes: 12 additions & 11 deletions experiment/builder_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from experiment import oss_fuzz_checkout, textcov
from experiment.benchmark import Benchmark
from experiment.fuzz_target_error import SemanticCheckResult
from results_wip import FuzzTargetResult
from experiment.workdir import WorkDirs
from llm_toolkit import code_fixer
from llm_toolkit.crash_triager import TriageResult
Expand Down Expand Up @@ -350,7 +351,7 @@ def _parse_libfuzzer_logs(self,
# Some logs from abnormal fuzz targets are too large to be parsed.
logger.error('%s is too large to parse: %s', log_handle.name, e)
return ParseResult(0, 0, False, '',
SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP))
SemanticCheckResult(FuzzTargetResult.GEN_LOG_MESS_UP))

cov_pcs, total_pcs, crashes = 0, 0, False

Expand Down Expand Up @@ -388,30 +389,30 @@ def _parse_libfuzzer_logs(self,
if symptom == 'null-deref':
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.NULL_DEREF, symptom,
SemanticCheckResult(FuzzTargetResult.NON_SEC_CRASH_NULL_DEREF, symptom,
crash_stacks, crash_func))

# Signal, normally indicating assertion failure due to inadequate
# parameter initialization or wrong function usage.
if symptom == 'signal':
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.SIGNAL, symptom,
SemanticCheckResult(FuzzTargetResult.NON_SEC_CRASH_SIGNAL, symptom,
crash_stacks, crash_func))

# Exit, normally indicating the fuzz target exited in a controlled manner,
# blocking its bug discovery.
if symptom.endswith('fuzz target exited'):
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.EXIT, symptom, crash_stacks,
SemanticCheckResult(FuzzTargetResult.NON_SEC_CRASH_EXIT, symptom, crash_stacks,
crash_func))

# Fuzz target modified constants.
if symptom.endswith('fuzz target overwrites its const input'):
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.OVERWRITE_CONST, symptom,
SemanticCheckResult(FuzzTargetResult.GEN_OVERWRITE_CONST, symptom,
crash_stacks, crash_func))

# OOM, normally indicating malloc's parameter is too large, e.g., because
Expand All @@ -421,7 +422,7 @@ def _parse_libfuzzer_logs(self,
if 'out-of-memory' in symptom or 'out of memory' in symptom:
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.FP_OOM, symptom,
SemanticCheckResult(FuzzTargetResult.FP_OOM, symptom,
crash_stacks, crash_func))

# FP case 2: fuzz target crashes at init or first few rounds.
Expand All @@ -430,7 +431,7 @@ def _parse_libfuzzer_logs(self,
# This is very likely the false positive cases.
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.FP_NEAR_INIT_CRASH, symptom,
SemanticCheckResult(FuzzTargetResult.FP_NEAR_INIT_CRASH, symptom,
crash_stacks, crash_func))

# FP case 3: no func in 1st thread stack belongs to testing proj.
Expand All @@ -441,13 +442,13 @@ def _parse_libfuzzer_logs(self,
if 'LLVMFuzzerTestOneInput' in stack_frame:
return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.FP_TARGET_CRASH,
SemanticCheckResult(FuzzTargetResult.FP_TARGET_CRASH,
symptom, crash_stacks, crash_func))
break

return ParseResult(
cov_pcs, total_pcs, True, crash_info,
SemanticCheckResult(SemanticCheckResult.NO_SEMANTIC_ERR, symptom,
SemanticCheckResult(FuzzTargetResult.NORM_NO_SEMANTIC_ERR, symptom,
crash_stacks, crash_func))

if check_cov_increase and initcov == donecov and lastround is not None:
Expand All @@ -457,10 +458,10 @@ def _parse_libfuzzer_logs(self,
# all inputs we tried.
return ParseResult(
cov_pcs, total_pcs, False, '',
SemanticCheckResult(SemanticCheckResult.NO_COV_INCREASE))
SemanticCheckResult(FuzzTargetResult.COV_NO_INCREASE))

return ParseResult(cov_pcs, total_pcs, crashes, '',
SemanticCheckResult(SemanticCheckResult.NO_SEMANTIC_ERR))
SemanticCheckResult(FuzzTargetResult.NORM_NO_SEMANTIC_ERR))

def build_and_run(
self,
Expand Down
6 changes: 3 additions & 3 deletions experiment/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from experiment import builder_runner, oss_fuzz_checkout, textcov
from experiment.benchmark import Benchmark
from experiment.builder_runner import BuildResult, RunResult
from experiment.fuzz_target_error import SemanticCheckResult
from results_wip import FuzzTargetResult
from experiment.workdir import WorkDirs
from llm_toolkit import code_fixer, corpus_generator, crash_triager
from llm_toolkit.crash_triager import TriageResult
Expand Down Expand Up @@ -520,7 +520,7 @@ def check_target(self, ai_binary, target_path: str) -> Result:
'',
'',
False,
SemanticCheckResult.NOT_APPLICABLE,
FuzzTargetResult.NORM_NOT_APPLICABLE.to_string(),
TriageResult.NOT_APPLICABLE,
compile_error=build_result.log_path,
compile_log=build_result.log_path))
Expand All @@ -541,7 +541,7 @@ def check_target(self, ai_binary, target_path: str) -> Result:
'',
'',
False,
SemanticCheckResult.NOT_APPLICABLE,
FuzzTargetResult.NORM_NOT_APPLICABLE.to_string(),
TriageResult.NOT_APPLICABLE,
compile_error=build_result.log_path,
compile_log=build_result.log_path))
Expand Down
73 changes: 11 additions & 62 deletions experiment/fuzz_target_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,13 @@
import re
from typing import Optional

from results_wip import FuzzTargetResult

logger = logging.getLogger(__name__)


class SemanticCheckResult:
"""Fuzz target semantic check results."""
NOT_APPLICABLE = '-'
NO_SEMANTIC_ERR = 'NO_SEMANTIC_ERR'
LOG_MESS_UP = 'LOG_MESS_UP'
FP_NEAR_INIT_CRASH = 'FP_NEAR_INIT_CRASH'
FP_TARGET_CRASH = 'FP_TARGET_CRASH'
FP_MEMLEAK = 'FP_MEMLEAK'
FP_OOM = 'FP_OOM'
FP_TIMEOUT = 'FP_TIMEOUT'
NO_COV_INCREASE = 'NO_COV_INCREASE'
NULL_DEREF = 'NULL_DEREF'
SIGNAL = 'SIGNAL'
EXIT = 'EXIT'
OVERWRITE_CONST = 'OVERWRITE_CONST'

# Regex for extract crash symptoms.
# Matches over 18 types of ASAN errors symptoms
# e.g. ERROR: AddressSanitizer: attempting use-after-free on xxx
Expand Down Expand Up @@ -89,11 +77,12 @@ def extract_crash_info(cls, fuzzlog: str) -> str:
return ''

def __init__(self,
err_type: str,
err_type: str | FuzzTargetResult,
crash_symptom: str = '',
crash_stacks: Optional[list[list[str]]] = None,
crash_func: Optional[dict] = None):
self.type = err_type
# Convert string error type to FuzzTargetResult enum if needed
self.type = FuzzTargetResult.from_string(err_type) if isinstance(err_type, str) else err_type
self.crash_symptom = crash_symptom
self.crash_stacks = crash_stacks if crash_stacks else []
self.crash_func = crash_func if crash_func else {}
Expand All @@ -104,54 +93,14 @@ def __repr__(self) -> str:

def _get_error_desc(self) -> str:
"""Returns one sentence error description used in fix prompt."""
if self.type == self.LOG_MESS_UP:
# TODO(happy-qop): Add detailed description for this error type.
return 'Overlong fuzzing log.'
if self.type == self.FP_NEAR_INIT_CRASH:
return (f'Fuzzing crashed immediately at runtime ({self.crash_symptom})'
', indicating fuzz target code for invoking the function under'
' test is incorrect or unrobust.')
if self.type == self.FP_TARGET_CRASH:
return (f'Fuzzing has crashes ({self.crash_symptom}) caused by fuzz '
'target code, indicating its usage for the function under '
'test is incorrect or unrobust.')
if self.type == self.FP_MEMLEAK:
return ('Memory leak detected, indicating some memory was not freed '
'by the fuzz target.')
if self.type == self.FP_OOM:
return ('Out-of-memory error detected, suggesting the fuzz target '
'incorrectly allocates too much memory or has a memory leak.')
if self.type == self.FP_TIMEOUT:
return ('Fuzz target timed out at runtime, indicating its usage for '
'the function under test is incorrect or unrobust.')
if self.type == self.NO_COV_INCREASE:
# TODO(dongge): Append the implementation of the function under test.
return (self.NO_COV_INCREASE_MSG_PREFIX + ', indicating the fuzz target'
' ineffectively invokes the function under test.')
if self.type == self.NULL_DEREF:
return ('Accessing a null pointer, indicating improper parameter '
'initialization or incorrect function usages in the fuzz target.')
if self.type == self.SIGNAL:
return ('Abort with signal, indicating the fuzz target has violated some '
'assertion in the project, likely due to improper parameter '
'initialization or incorrect function usages.')
if self.type == self.EXIT:
return ('Fuzz target exited in a controlled manner without showing any '
'sign of memory corruption, likely due to the fuzz target is not '
'well designed to effectively find memory corruption '
'vulnerability in the function-under-test.')
if self.type == self.OVERWRITE_CONST:
return ('Fuzz target modified a const data. To fix this, ensure that all '
'input data passed to the fuzz target is treated as read-only '
'and not modified. Copy the input data to a separate buffer if '
'any modifications are necessary.')

return ''
return self.type.get_error_desc(self.crash_symptom) if self.type else ''

def _get_error_detail(self) -> list[str]:
"""Returns detailed error description used in fix prompt."""
if self.type not in [
self.FP_NEAR_INIT_CRASH, self.FP_TARGET_CRASH, self.FP_TIMEOUT
FuzzTargetResult.FP_NEAR_INIT_CRASH,
FuzzTargetResult.FP_TARGET_CRASH,
FuzzTargetResult.FP_TIMEOUT
]:
return []

Expand All @@ -166,12 +115,12 @@ def get_error_info(self) -> tuple[str, list[str]]:

@property
def has_err(self) -> bool:
return self.type not in (self.NOT_APPLICABLE, self.NO_SEMANTIC_ERR)
return self.type not in (FuzzTargetResult.NORM_NOT_APPLICABLE, FuzzTargetResult.NORM_NO_SEMANTIC_ERR)

def to_dict(self):
return {
'has_err': self.has_err,
'err_type': self.type,
'err_type': self.type.to_string() if self.type else '',
'crash_symptom': self.crash_symptom,
'crash_stacks': self.crash_stacks,
'crash_func': self.crash_func,
Expand Down
Loading