diff --git a/agent/base_agent.py b/agent/base_agent.py
index f4060b075..6a083350f 100644
--- a/agent/base_agent.py
+++ b/agent/base_agent.py
@@ -14,6 +14,7 @@
"""The abstract base class for LLM agents in stages."""
import argparse
import asyncio
+import json
import os
import random
import re
@@ -370,6 +371,20 @@ def __init__(self,
logger.info('ADK Agent %s created.', self.name, trial=self.trial)
+ def get_xml_representation(self, response: Optional[dict]) -> str:
+ """Returns the XML representation of the response."""
+ if not response:
+ return ''
+ # If the response is not a dict, return it as string
+ if not isinstance(response, dict):
+ return str(response)
+ # Now, we wrap items in a dict with xml tags.
+ xml_rep = []
+ for key, value in response.items():
+ xml_obj = f'<{key}>\n{value}\n{key}>'
+ xml_rep.append(xml_obj)
+ return '\n'.join(xml_rep)
+
def chat_llm(self, cur_round: int, client: Any, prompt: Prompt,
trial: int) -> Any:
"""Call the agent with the given prompt, running async code in sync."""
@@ -398,10 +413,14 @@ async def _call():
self.log_llm_response(final_response)
elif event.content.parts[0].function_response:
final_response = event.content.parts[0].function_response.response
+ self.log_llm_response(self.get_xml_representation(final_response))
elif event.actions and event.actions.escalate:
error_message = event.error_message
logger.error('Agent escalated: %s', error_message, trial=self.trial)
+ if not final_response:
+ self.log_llm_response('No valid response from LLM.')
+
return final_response
return self.llm.with_retry_on_error(lambda: asyncio.run(_call()),
diff --git a/agent/context_analyzer.py b/agent/context_analyzer.py
index 2d7beb6dd..ea7813b25 100644
--- a/agent/context_analyzer.py
+++ b/agent/context_analyzer.py
@@ -73,13 +73,18 @@ def execute(self,
last_result = result_history[-1]
- if not isinstance(
- last_result, resultslib.AnalysisResult) or not last_result.crash_result:
- logger.error(f'Expected last result to be AnalysisResult, got %s.',
+ # Validate that the last result is an AnalysisResult and has a valid crash_result
+ if not isinstance(last_result, resultslib.AnalysisResult):
+ logger.error('Expected last result to be AnalysisResult, got %s.',
type(last_result),
trial=self.trial)
return last_result
+ if not last_result.crash_result:
+ logger.error('Missing crash_result in the AnalysisResult.',
+ trial=self.trial)
+ return last_result
+
context_result = None
# Initialize the ProjectContainerTool for local file search
@@ -99,6 +104,10 @@ def execute(self,
trial=result_history[-1].trial)
context_result = resultslib.CrashContextResult.from_dict(final_response)
if context_result:
+ logger.info(
+ 'Is context analyzer result consistent: %s',
+ str(context_result.feasible == last_result.crash_result.true_bug),
+ trial=self.trial)
break
logger.error('Failed to parse LLM response into CrashContextResult.',
trial=self.trial)
@@ -239,7 +248,7 @@ def get_function_implementation(self, project_name: str,
return response
def report_final_result(self, feasible: bool, analysis: str,
- recommendations: str,
+ source_code_evidence: str, recommendations: str,
tool_context: ToolContext) -> dict:
"""
Provide final result, including the crash feasibility,
@@ -247,8 +256,10 @@ def report_final_result(self, feasible: bool, analysis: str,
Args:
feasible (bool): True if the crash is feasible, False otherwise.
- analysis (str): Detailed analysis and source code evidence showing
+ analysis (str): Detailed analysis showing
why the crash is or is not feasible.
+ source_code_evidence (str): Source code evidence supporting the analysis.
+ This MUST show the constraints on input variables and why they make the crash feasible or not feasible.
recommendations (str): Recommendations for modifying the fuzz target to
prevent the crash. If the crash is feasible,
this should be empty.
@@ -256,14 +267,11 @@ def report_final_result(self, feasible: bool, analysis: str,
Returns:
This function will not return anything to the LLM.
"""
- response = f"""
- \n{feasible}\n
- \n{analysis}\n
- \n{recommendations}\n
- """
- self.log_llm_response(response)
crash_context_result = resultslib.CrashContextResult(
- feasible=feasible, analysis=analysis, recommendations=recommendations)
+ feasible=feasible,
+ analysis=analysis,
+ source_code_evidence=source_code_evidence,
+ recommendations=recommendations)
# We have received final result. Instruct the agent to terminate execution.
# tool_context._invocation_context.end_invocation = True
diff --git a/agent/function_analyzer.py b/agent/function_analyzer.py
index 8de800b03..6379db00f 100644
--- a/agent/function_analyzer.py
+++ b/agent/function_analyzer.py
@@ -106,15 +106,24 @@ def execute(self,
# Call the agent asynchronously and return the result
prompt = self._initial_prompt(result_history)
- final_response_text = self.chat_llm(self.round,
- client=None,
- prompt=prompt,
- trial=result_history[-1].trial)
-
- self.handle_llm_response(final_response_text, result)
+ while self.round < self.max_round:
+ final_response_text = self.chat_llm(self.round,
+ client=None,
+ prompt=prompt,
+ trial=result_history[-1].trial)
+ if final_response_text:
+ self.handle_llm_response(final_response_text, result)
+ break
+
+ # Handle invalid LLM response
+ template_builder = prompt_builder.FunctionAnalyzerTemplateBuilder(
+ self.llm, self.benchmark)
+
+ prompt = self._container_handle_invalid_tool_usage(
+ [self.inspect_tool], self.round, final_response_text,
+ template_builder.build(), template_builder.get_response_format())
self.inspect_tool.terminate()
-
return result
def _initial_prompt(
@@ -126,7 +135,7 @@ def _initial_prompt(
builder = prompt_builder.FunctionAnalyzerTemplateBuilder(
self.llm, self.benchmark)
- prompt = builder.build_prompt()
+ prompt = builder.build_prompt(self.inspect_tool.project_dir)
prompt.append(self.inspect_tool.tutorial())
diff --git a/benchmark-sets/analyzer-tests-1/astc-encoder.yaml b/benchmark-sets/analyzer-tests-1/astc-encoder.yaml
new file mode 100644
index 000000000..ae83570c1
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/astc-encoder.yaml
@@ -0,0 +1,15 @@
+"functions":
+- "name": "_Z20symbolic_to_physicalRK21block_size_descriptorRK25symbolic_compressed_blockPh"
+ "params":
+ - "name": "bsd"
+ "type": "bool "
+ - "name": "scb"
+ "type": "bool "
+ - "name": "pcb"
+ "type": "bool "
+ "return_type": "void"
+ "signature": "void symbolic_to_physical(const struct block_size_descriptor &, const struct symbolic_compressed_block &, uint8_t *)"
+"language": "c++"
+"project": "astc-encoder"
+"target_name": "fuzz_astc_physical_to_symbolic"
+"target_path": "/src/astc-encoder/Source/Fuzzers/fuzz_astc_physical_to_symbolic.cpp"
diff --git a/benchmark-sets/analyzer-tests-1/bind9.yaml b/benchmark-sets/analyzer-tests-1/bind9.yaml
new file mode 100644
index 000000000..4a6d55559
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/bind9.yaml
@@ -0,0 +1,17 @@
+"functions":
+- "name": "dns_zt_asyncload"
+ "params":
+ - "name": "zt"
+ "type": "bool "
+ - "name": "newonly"
+ "type": "bool"
+ - "name": "loaddone"
+ "type": "bool "
+ - "name": "arg"
+ "type": "bool "
+ "return_type": "int"
+ "signature": "isc_result_t dns_zt_asyncload(dns_zt_t *, bool, dns_zt_callback_t *, void *)"
+"language": "c"
+"project": "bind9"
+"target_name": "isc_lex_gettoken_fuzzer"
+"target_path": "/src/bind9/fuzz/isc_lex_gettoken.c"
diff --git a/benchmark-sets/analyzer-tests-1/bluez.yaml b/benchmark-sets/analyzer-tests-1/bluez.yaml
new file mode 100644
index 000000000..22ae588ca
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/bluez.yaml
@@ -0,0 +1,21 @@
+"functions":
+- "name": "g_obex_get_req"
+ "params":
+ - "name": "obex"
+ "type": "bool "
+ - "name": "data_func"
+ "type": "bool "
+ - "name": "complete_func"
+ "type": "bool "
+ - "name": "user_data"
+ "type": "bool "
+ - "name": "err"
+ "type": "bool "
+ - "name": "first_hdr_id"
+ "type": "int"
+ "return_type": "int"
+ "signature": "guint g_obex_get_req(GObex *, GObexDataConsumer, GObexFunc, gpointer, GError **, guint, void)"
+"language": "c"
+"project": "bluez"
+"target_name": "fuzz_textfile"
+"target_path": "/src/fuzz_textfile.c"
diff --git a/benchmark-sets/analyzer-tests-1/clamav.yaml b/benchmark-sets/analyzer-tests-1/clamav.yaml
new file mode 100644
index 000000000..4ed682416
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/clamav.yaml
@@ -0,0 +1,25 @@
+"functions":
+- "name": "Lzma2Decode"
+ "params":
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "char"
+ - "name": ""
+ "type": "int"
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "bool "
+ "return_type": "int"
+ "signature": "SRes Lzma2Decode(Byte *, SizeT *, const Byte *, SizeT *, Byte, ELzmaFinishMode, ELzmaStatus *, ISzAlloc *)"
+"language": "c++"
+"project": "clamav"
+"target_name": "clamav_scanfile_HWP3_fuzzer"
+"target_path": "/src/clamav/fuzz/clamav_scanfile_fuzzer.cpp"
diff --git a/benchmark-sets/analyzer-tests-1/exiv2.yaml b/benchmark-sets/analyzer-tests-1/exiv2.yaml
new file mode 100644
index 000000000..3382b26a5
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/exiv2.yaml
@@ -0,0 +1,15 @@
+"functions":
+- "name": "_ZNK12_GLOBAL__N_113TiffThumbnail4copyERKN5Exiv28ExifDataE"
+ "params":
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "bool "
+ "return_type": "void"
+ "signature": "struct DataBuf (anonymous namespace)::TiffThumbnail::copy(const ExifData &)"
+"language": "c++"
+"project": "exiv2"
+"target_name": "fuzz-read-print-write"
+"target_path": "/src/exiv2/fuzz/fuzz-read-print-write.cpp"
diff --git a/benchmark-sets/analyzer-tests-1/ibmswtpm2.yaml b/benchmark-sets/analyzer-tests-1/ibmswtpm2.yaml
new file mode 100644
index 000000000..8e0b4074f
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/ibmswtpm2.yaml
@@ -0,0 +1,13 @@
+"functions":
+- "name": "TPM2_Create"
+ "params":
+ - "name": "in"
+ "type": "bool "
+ - "name": "out"
+ "type": "bool "
+ "return_type": "int"
+ "signature": "TPM_RC TPM2_Create(Create_In *, Create_Out *)"
+"language": "c++"
+"project": "ibmswtpm2"
+"target_name": "fuzz_tpm_server"
+"target_path": "/src/ibmswtpm2/src/fuzzer.cc"
diff --git a/benchmark-sets/analyzer-tests-1/libfuse.yaml b/benchmark-sets/analyzer-tests-1/libfuse.yaml
new file mode 100644
index 000000000..80f2064fb
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/libfuse.yaml
@@ -0,0 +1,11 @@
+"functions":
+- "name": "af_gb_alloc_data"
+ "params":
+ - "name": "len"
+ "type": "size_t"
+ "return_type": "void"
+ "signature": "char * af_gb_alloc_data(size_t)"
+"language": "c"
+"project": "libfuse"
+"target_name": "fuzz_optparse"
+"target_path": "/src/fuzz_optparse.c"
diff --git a/benchmark-sets/analyzer-tests-1/liblouis.yaml b/benchmark-sets/analyzer-tests-1/liblouis.yaml
new file mode 100644
index 000000000..f080d1e48
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/liblouis.yaml
@@ -0,0 +1,13 @@
+"functions":
+- "name": "lou_getTypeformForEmphClass"
+ "params":
+ - "name": "tableList"
+ "type": "bool "
+ - "name": "emphClass"
+ "type": "bool "
+ "return_type": "short"
+ "signature": "formtype lou_getTypeformForEmphClass(const char *, const char *)"
+"language": "c"
+"project": "liblouis"
+"target_name": "table_fuzzer"
+"target_path": "/src/liblouis/tests/fuzzing/table_fuzzer.cc"
diff --git a/benchmark-sets/analyzer-tests-1/libraw.yaml b/benchmark-sets/analyzer-tests-1/libraw.yaml
new file mode 100644
index 000000000..b2c8dc21e
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/libraw.yaml
@@ -0,0 +1,15 @@
+"functions":
+- "name": "_ZN6LibRaw17crxLoadDecodeLoopEPvi"
+ "params":
+ - "name": "this"
+ "type": "bool "
+ - "name": "img"
+ "type": "bool "
+ - "name": "nPlanes"
+ "type": "int"
+ "return_type": "void"
+ "signature": "void LibRaw::crxLoadDecodeLoop(void *, int)"
+"language": "c++"
+"project": "libraw"
+"target_name": "libraw_cr2_fuzzer"
+"target_path": "/src/libraw_fuzzer.cc"
diff --git a/benchmark-sets/analyzer-tests-1/libsndfile.yaml b/benchmark-sets/analyzer-tests-1/libsndfile.yaml
new file mode 100644
index 000000000..6c6127bb5
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/libsndfile.yaml
@@ -0,0 +1,15 @@
+"functions":
+- "name": "sf_open"
+ "params":
+ - "name": "path"
+ "type": "bool "
+ - "name": "mode"
+ "type": "int"
+ - "name": "sfinfo"
+ "type": "bool "
+ "return_type": "void"
+ "signature": "SNDFILE * sf_open(const char *, int, SF_INFO *)"
+"language": "c"
+"project": "libsndfile"
+"target_name": "sndfile_fuzzer"
+"target_path": "/src/libsndfile/ossfuzz/sndfile_fuzzer.cc"
diff --git a/benchmark-sets/analyzer-tests-1/libsodium.yaml b/benchmark-sets/analyzer-tests-1/libsodium.yaml
new file mode 100644
index 000000000..3c4e00a47
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/libsodium.yaml
@@ -0,0 +1,13 @@
+"functions":
+- "name": "argon2_initialize"
+ "params":
+ - "name": "instance"
+ "type": "bool "
+ - "name": "context"
+ "type": "bool "
+ "return_type": "int"
+ "signature": "int argon2_initialize(argon2_instance_t *, argon2_context *)"
+"language": "c++"
+"project": "libsodium"
+"target_name": "secret_key_auth_fuzzer"
+"target_path": "/src/secret_key_auth_fuzzer.cc"
diff --git a/benchmark-sets/analyzer-tests-1/mosh.yaml b/benchmark-sets/analyzer-tests-1/mosh.yaml
new file mode 100644
index 000000000..727b10487
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/mosh.yaml
@@ -0,0 +1,15 @@
+"functions":
+- "name": "_ZN8Terminal8Emulator6resizeEmm"
+ "params":
+ - "name": "this"
+ "type": "bool "
+ - "name": "s_width"
+ "type": "size_t"
+ - "name": "s_height"
+ "type": "size_t"
+ "return_type": "void"
+ "signature": "void Terminal::Emulator::resize(size_t, size_t)"
+"language": "c++"
+"project": "mosh"
+"target_name": "terminal_parser_fuzzer"
+"target_path": "/src/mosh/src/fuzz/terminal_parser_fuzzer.cc"
diff --git a/benchmark-sets/analyzer-tests-1/quickjs.yaml b/benchmark-sets/analyzer-tests-1/quickjs.yaml
new file mode 100644
index 000000000..75c869f15
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/quickjs.yaml
@@ -0,0 +1,19 @@
+"functions":
+- "name": "JS_ParseJSON2"
+ "params":
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "size_t"
+ - "name": ""
+ "type": "bool "
+ - "name": ""
+ "type": "int"
+ "return_type": "void"
+ "signature": "JSValue JS_ParseJSON2(JSContext *, const char *, size_t, const char *, int)"
+"language": "c"
+"project": "quickjs"
+"target_name": "fuzz_regexp"
+"target_path": "/src/quickjs/fuzz/fuzz_regexp.c"
diff --git a/benchmark-sets/analyzer-tests-1/vlc.yaml b/benchmark-sets/analyzer-tests-1/vlc.yaml
new file mode 100644
index 000000000..cc6adcb63
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/vlc.yaml
@@ -0,0 +1,13 @@
+"functions":
+- "name": "vlm_New"
+ "params":
+ - "name": "libvlc"
+ "type": "bool "
+ - "name": "psz_vlmconf"
+ "type": "bool "
+ "return_type": "void"
+ "signature": "vlm_t * vlm_New(libvlc_int_t *, const char *)"
+"language": "c"
+"project": "vlc"
+"target_name": "vlc-demux-libfuzzer"
+"target_path": "/src/vlc/test/vlc-demux-libfuzzer.c"
diff --git a/benchmark-sets/analyzer-tests-1/xs.yaml b/benchmark-sets/analyzer-tests-1/xs.yaml
new file mode 100644
index 000000000..4dc3adb99
--- /dev/null
+++ b/benchmark-sets/analyzer-tests-1/xs.yaml
@@ -0,0 +1,11 @@
+"functions":
+- "name": "fxLoadModulesRejected"
+ "params":
+ - "name": "the"
+ "type": "bool "
+ "return_type": "void"
+ "signature": "void fxLoadModulesRejected(txMachine *)"
+"language": "c"
+"project": "xs"
+"target_name": "xst_jsonparse"
+"target_path": "/src/moddable/xs/tools/xstFuzz.c"
diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py
index ddb801a54..2e68cb386 100644
--- a/llm_toolkit/prompt_builder.py
+++ b/llm_toolkit/prompt_builder.py
@@ -827,8 +827,14 @@ def build(self,
self.crash_result.insight)
if self.context_result:
+ context_analyzer_insight = f"""
+ {self.context_result.analysis}
+
+ Here is the source code evidence for this insight.
+ {self.context_result.source_code_evidence}
+ """
priming = priming.replace('CONTEXT_ANALYZER_INSIGHT',
- self.context_result.analysis)
+ context_analyzer_insight)
fix_recommendations = FIX_RECOMMENDATION_HEADER + self.context_result.recommendations
priming = priming.replace('FIX_RECOMMENDATION', fix_recommendations)
@@ -915,6 +921,8 @@ def __init__(self,
AGENT_TEMPLATE_DIR, 'function-analyzer-description.txt')
self.function_analyzer_prompt_template_file = self._find_template(
AGENT_TEMPLATE_DIR, 'function-analyzer-priming.txt')
+ self.function_analyzer_response_file = self._find_template(
+ DEFAULT_TEMPLATE_DIR, 'function-analyzer-response.txt')
def get_instruction(self) -> prompts.Prompt:
"""Constructs a prompt using the templates in |self| and saves it."""
@@ -942,7 +950,7 @@ def get_description(self) -> prompts.Prompt:
return self._prompt
- def build_prompt(self) -> prompts.Prompt:
+ def build_prompt(self, project_dir: str) -> prompts.Prompt:
"""Constructs a prompt using the templates in |self| and saves it."""
if not self.benchmark:
@@ -955,6 +963,7 @@ def build_prompt(self) -> prompts.Prompt:
prompt = prompt.replace('{PROJECT_NAME}', self.benchmark.project)
prompt = prompt.replace('{FUNCTION_SIGNATURE}',
self.benchmark.function_signature)
+ prompt = prompt.replace('{PROJECT_DIR}', project_dir)
# Get the function source
func_source = introspector.query_introspector_function_source(
@@ -980,10 +989,16 @@ def build_prompt(self) -> prompts.Prompt:
references_str = '\n'.join(references)
prompt = prompt.replace('{FUNCTION_REFERENCES}', references_str)
+ prompt = prompt.replace('{RESPONSE_FORMAT}', self.get_response_format())
+
self._prompt.append(prompt)
return self._prompt
+ def get_response_format(self) -> str:
+ """Returns the response format for the function analyzer."""
+ return self._get_template(self.function_analyzer_response_file)
+
def build(self,
example_pair: Optional[list[list[str]]] = None,
project_example_content: Optional[list[list[str]]] = None,
@@ -993,9 +1008,10 @@ def build(self,
project_name: str = '',
function_signature: str = '') -> prompts.Prompt:
- raise NotImplementedError(
- 'FunctionAnalyzerTemplateBuilder.build() should not be called. '
- 'Use build_prompt() instead.')
+ del (example_pair, project_example_content, project_context_content,
+ tool_guides, project_dir, project_name, function_signature)
+
+ return self._prompt
class ContextAnalyzerTemplateBuilder(DefaultTemplateBuilder):
diff --git a/prompts/agent/context-analyzer-priming.txt b/prompts/agent/context-analyzer-priming.txt
index 3ff5e63d0..d3f56e1f9 100644
--- a/prompts/agent/context-analyzer-priming.txt
+++ b/prompts/agent/context-analyzer-priming.txt
@@ -2,9 +2,10 @@
You are a professional security engineer developing fuzz targets for the project {PROJECT_NAME}.
The previous fuzz target you developed caused a crash in the project.
However, this crash is NOT feasible and cannot be triggered from the project's external entry points (public functions with no non-test callers).
+Your goal is to work backwards from the functions called by the fuzz target to the project's entry points and determine if the crash is feasible from the project's entry points.
+You are to provide detailed analysis and detailed source code evidence that the crash is NOT feasible from any entry point in the program.
We have provided you with the fuzz target that caused the crash, its requirements, the crash stacktrace, an analysis of the crash,
and a set of tools that you can use to navigate the project's codebase.
-Your goal is to deeply analyze the project and the context of the crashing function and provide detailed source code evidence that the crash is NOT feasible from any entry point in the program.
If the crash is indeed not feasible, also provide a recommendation for fixing the fuzz target.
If the crash is feasible, provide source code evidence that shows the sequence of execution from the project's entry point that can cause the crash.
@@ -14,7 +15,7 @@ Follow these steps to determine the feasibility of this crash.
Step 1: Identify the information you need to analyze the crash and the function's context.
This includes:
- - The crashing function called by the fuzz target
+ - The functions involved in the crash trace
- The different functions where it is called from
- The entry points of the project
- The crash stacktrace and analysis
@@ -28,25 +29,49 @@ Use the provided tools (see section) and follow their rules to gather the
- External API using the get_function_implementation tool.
- Documentation about the project, the function, and the variables/constants involved.
-Step 3: Identify the specific input condition that leads to the crash.
-This is the input variable values, global variable values or combination of input and global variable values that cause the crash.
-You can get these states by reviewing:
- - The crash analysis
- - The functions identified in the stack trace
- - The fuzz target
+Step 3: Identify the function called by the fuzz target that led to the crash.
+ - This is the function that is directly called by LLVMFuzzerTestOneInput or other functions in the fuzz target.
+ - Your analysis should show if the program calls this function that same way it is called in the fuzz driver that caused the crash.
-Step 4: Analyze the project and provide proof, with source code evidence, that the crash cannot be triggered from a the project's entry points (public non-test functions).
- - If the crash is not feasible, this proof should show why the crash conditions cannot be satisfied in the different execution paths.
- - If the crash is feasible, this proof should show how the crash conditions are satisfied when the crashing function is called from the program's entry points.
+For example, if the crash trace is:
+#0 0x7f032757700b in raise
+#1 0x7f0327556858 in abort
+#2 0x7f0327556728 (/lib/x86_64-linux-gnu/libc.so.6+0x22728)
+#3 0x7f0327567fd5 in __assert_fail
+#4 0x55f7ce3d653c in encode_ise
+#4 0x55f7ce3d838c in symbolic_to_physical
+#5 0x55f7ce3c318c in LLVMFuzzerTestOneInput
+#6 0x55f7ce277580 in fuzzer::Fuzzer::ExecuteCallback
+#7 0x55f7ce276da5 in fuzzer::Fuzzer::RunOne
+#8 0x55f7ce278585 in fuzzer::Fuzzer::MutateAndTestOne()
+#9 0x55f7ce279315 in fuzzer::Fuzzer::Loop
+
+The function that led to the crash is `symbolic_to_physical`, not 'encode_ise` or `LLVMFuzzerTestOneInput`.
+This is the target function for this analysis
+
+Step 4: Identify the specific input conditions that led to the crash.
+ - This is the input conditions created by the fuzz driver that directly led to the crash.
+ - These conditions should focus on
+ - In your analysis, you must show how these conditions were necessary for the crash to occur.
+
+Step 5: Identify the call sites of the target function that was called by the fuzz driver and all constraints on its input arguments.
+ - These are the locations in the program were the target function was called.
+ - You should show how the input arguments were generated in the call sites and all constraints enforced by the program on the input arguments.
+
+Step 6: Analyze the project and provide proof, with source code evidence, that the crash cannot be triggered from a the project's entry points (public non-test functions).
+ - To obtain proof, identify all the path constraints from the entry points to the target function and show how the input condition violated these constraints.
+ - If the crash is feasible, you should show a step-by-step execution path from the entry point and the corresponding path constraints that enable the crash.
- DO NOT SPECULATE. Provide source code evidence to back up any statement you make about the crash's feasibility.
-Step 5: If the crash is not feasible, recommend necessary and minimal modifications to the fuzz target to prevent the crash.
- - The modifications should mirror how the crashing function is called in different project locations.
+Step 7: If the crash is not feasible, recommend necessary and minimal modifications to the fuzz target to prevent the crash.
+ - Your recommendations should show what constraints should be added to the fuzz target to prevent the crash.
+ - The recommended modifications should mirror how the crashing function is called in different project locations.
Step 8: MAKE SURE YOU PROVIDE YOUR RESPONSE USING THE report_final_result TOOL.
- The tool takes three arguments - feasible, analysis and recommendations.
- feasible should be True if the crash is feasible, or False otherwise.
- - analysis should contain source code evidence of the crash's feasibility.
+ - analysis should contain your detailed analysis of the crash's feasibility.
+ - source_code_evidence should contain the detailed source code evidence to back your analysis.
- recommendations should only be provided for non-feasible crashes, and should recommend fuzz target modifications to prevent this crash.
diff --git a/prompts/agent/crash_analyzer-priming.txt b/prompts/agent/crash_analyzer-priming.txt
index daf7aa648..c8a849fd6 100644
--- a/prompts/agent/crash_analyzer-priming.txt
+++ b/prompts/agent/crash_analyzer-priming.txt
@@ -17,3 +17,7 @@ Below is relevant project function code:
To help analyze the root cause behind the runtime crash, you can leverage GDB tool and BASH tool to obtain information.
+
+Instructions:
+1. ALWAYS use the provided GDB or BASH tools to locate the program lines mentioned in the crash report.
+2. DO NOT TRY TO ANALYZE OR COUNT THE LINES OF CODE IN THE PROGRAM YOURSELF.
diff --git a/prompts/agent/function-analyzer-priming.txt b/prompts/agent/function-analyzer-priming.txt
index 86d3baa76..f3d00ff67 100644
--- a/prompts/agent/function-analyzer-priming.txt
+++ b/prompts/agent/function-analyzer-priming.txt
@@ -7,25 +7,69 @@ Your goal is to analyze the provided function, its children functions, and its u
We are interested in only the following kinds of requirements.
- - Input requirements that are necessary to prevent program crashes.
+ - Input requirements that are necessary to prevent crashes in the target function.
* Program crashes can be caused by assertion failures, invalid array indexing, out-of-bound memory accesses, pointer dereferencing failures.
- Requirements for creating valid input arguments.
- * Here, you should mention what existing function or functions should be used to create a valid input argument.
- * For example, if a function takes in an integer argument but uses that argument as a file descriptor for reading a fil (eg the read function), then it implies the integer must have been returned by another function that creates a file descriptor (eg the open function).Add commentMore actions
- * Similarly, if a function takes in a character pointer and uses it like a file path or name, then this implies a valid file should be created and the path or name passed to this function.
- * Also, if a function takes in a pointer argument and uses that argument as an argument to strlen or strcpy or other string handling function, this implies the function expects a null-terminated string.
+ * What existing function or functions should be used to create valid input arguments.
+ * This should be informed by how these arguments are created in the target function's callers.
+ * For example, if a function uses an integer argument as a file descriptor when reading a file (eg the read function), you should check what functions the callers use to create valid file descriptors (eg the open function).
+ - Requirements for variable argument lists in variadic functions
+ * What factors influence the number and type of arguments in these functions?
+ * How do the callers determine and initialize the arguments for these functions?
- Relationship between inputs
* For example, this can be the relationship between a pointer and an integer argument representing its size.
- Input variables that should be fuzzed
- * What input variables can be user-controlled or contain invalid values?
- * For example, if a function parses or processes one of its input arguments, then that argument is fuzzable.
+ * What input variables can contain user-controlled input or invalid values at caller sites?
+ * What input arguments always contain fixed, valid or constrained arguments at the caller sites?
- Setup functions to call before the target function can be called.
- * This is the function or set of functions we must call before calling the targte function.
- * For example, if a function depends on a global variable which is set by another function, this may imply we need to call that function before the target function.
+ * This is the function or set of functions that are necessary to initialize the state of the target fucntion.
+ * For example, if a function depends on a global variable which is set by another function, we need to call that function before the target function.
Keep each requirement concise. Each requirement should contain two sentences. The first is the requirement. The second is a brief reason why it is important.
+
+Step 1: Collect the information you need to analyze the input requirements of a function.
+The project source code directory is `{PROJECT_DIR}/` and was cloned from the project repository.
+This includes:
+ - The implementation of the target function
+ - The call sites of the target function
+ - The implementations of the target function's callers
+
+Step 2: Identify the list of input parameters for the target function
+
+Step 3: Analyze how each input parameter is generated in the function's callers.
+ - is the input parameter always created or initialized using a specific function?
+ - can the input parameter be user-controlled or contain invalid values?
+ - Are there clear relationships between the input arguments passed to the function?
+
+Step 4: Identify the implicit requirements of the parameters in the target functions.
+Examples include:
+ - Implicit assumptions about pointer states (null pointer, function pointers, string null-terminated pointers, etc)?
+ - Implicit assumptions about range of integer variables or struct members?
+ - Implicit assumptions about the number or types of variable arguments?
+
+Step 5: Based on your analysis, specify how each input parameter should be initialized in the requirements.
+Options include the following:
+ - Initialized using a specific function (eg fopen)
+ - Initialized using values from an array in the program
+ - Initialized and constrained to a valid set of values, based on possible values the callers can provide
+ - Initialized and constrained based on the target function's implicit requirements
+ - Initialized using fuzz inputs
+
+Step 6: Specify any functions that should be called before the target function.
+This involves
+ - Functions that sets the correct state of the target function
+ - Functions that allocates or frees necessary resources
+
+Step 7: Compile your results into a list of requirements for the function.
+ - Each requirement should be short and precise.
+ - If a requirement contains a constant, you should check the codebase and include the value of the constant as well.
+ - If an input parameter is a struct or struct pointer, mention if the struct should be created as a whole or how each member should be initialized.
+ - Identify a concise reason for including each requirement.
+
+
+
Here is the provided data.
@@ -36,38 +80,7 @@ Here is the provided data.
{FUNCTION_REFERENCES}
-
-You MUST return your response in the format below.
-
-
-Make sure your response follows the following format.
-
-
-
-project name: the name of the project provided
-function signature: The function's signature
-
-
-
-A summary of what the function does.
-
-
-
-
-First requirement
-
-
-Second requirement
-
-...
-
-nth requirement
-
-
-
-
-
-
+{RESPONSE_FORMAT}
@@ -104,3 +117,8 @@ You will be provided with the following tools.
1. get_function_implementation: This is a tool you can use to retrieve the implementation of a function using the project's name and function's name as arguments.
2. search_project_files: This is an interactive tool you can use to search the project's source file using bash commands and find definitions or usages of functions, classes, structs, and variables.
The usage guide for the Bash Tool is provided below.
+
+
+1. If the target function takes in a callback or function pointer as argument, provide instructions for creating this callback correctly based on how similar callbacks are created in the project.
+ This should include what the callback should do, and what memory resources it should create or free.
+
diff --git a/prompts/template_xml/context-analyzer-response.txt b/prompts/template_xml/context-analyzer-response.txt
index 17625098c..d0b86a1df 100644
--- a/prompts/template_xml/context-analyzer-response.txt
+++ b/prompts/template_xml/context-analyzer-response.txt
@@ -2,4 +2,5 @@ YOU MUST PROVIDE YOUR FINAL RESPONSE USING THE report_final_result TOOL.
- The tool takes three arguments - feasible, analysis and recommendations.
- feasible should be True if the crash is feasible, or False otherwise.
- analysis should contain source code evidence of the crash's feasibility.
+ - source_code_evidence should contain the detailed source code evidence to back your analysis.
- recommendations should only be provided for non-feasible crashes, and should recommend fuzz target modifications to prevent this crash.
diff --git a/prompts/template_xml/function-analyzer-response.txt b/prompts/template_xml/function-analyzer-response.txt
new file mode 100644
index 000000000..e0ff5b377
--- /dev/null
+++ b/prompts/template_xml/function-analyzer-response.txt
@@ -0,0 +1,26 @@
+You MUST return your response in the format below.
+
+
+
+project name: the name of the project provided
+function signature: The function's signature
+
+
+
+A summary of what the function does.
+
+
+
+
+First requirement
+
+
+Second requirement
+
+...
+
+nth requirement
+
+
+
+
\ No newline at end of file
diff --git a/results.py b/results.py
index e9aa4ad4a..c8ba6bd73 100644
--- a/results.py
+++ b/results.py
@@ -310,20 +310,24 @@ class CrashContextResult():
"""Analysis result of the context of the crashing function."""
feasible: bool
analysis: str
+ source_code_evidence: str
recommendations: str
def __init__(self,
feasible: bool = False,
analysis: str = '',
+ source_code_evidence: str = '',
recommendations: str = ''):
self.feasible = feasible
self.analysis = analysis
+ self.source_code_evidence = source_code_evidence
self.recommendations = recommendations
def to_dict(self) -> dict:
return {
'feasible': self.feasible,
'analysis': self.analysis,
+ 'source_code_evidence': self.source_code_evidence,
'recommendations': self.recommendations,
}
@@ -337,6 +341,8 @@ def from_dict(data: Any) -> Optional['CrashContextResult']:
return CrashContextResult(feasible=data.get('feasible', False),
analysis=data.get('analysis', ''),
+ source_code_evidence=data.get(
+ 'source_code_evidence', ''),
recommendations=data.get('recommendations', ''))
diff --git a/stage/analysis_stage.py b/stage/analysis_stage.py
index 68b02daf2..d7c339c21 100644
--- a/stage/analysis_stage.py
+++ b/stage/analysis_stage.py
@@ -15,7 +15,7 @@
stage is responsible for categorizing run-time crashes and detecting untested
code blocks."""
-from results import Result, RunResult
+from results import AnalysisResult, Result, RunResult
from stage.base_stage import BaseStage
@@ -39,10 +39,17 @@ def execute(self, result_history: list[Result]) -> Result:
agent = self.get_agent(agent_name='CrashAnalyzer')
agent_result = self._execute_agent(agent, result_history)
self.logger.write_chat_history(agent_result)
- result_history.append(agent_result)
+ # If it's true bug, save result and execute the ContextAnalyzer
+ if (isinstance(agent_result, AnalysisResult) and
+ agent_result.crash_result and agent_result.crash_result.true_bug):
+ result_history.append(agent_result)
+ # Then, execute the ContextAnalyzer agent to analyze the crash.
+ agent = self.get_agent(agent_name='ContextAnalyzer')
+ else:
+ self.logger.debug('Analysis stage completed with with result:\n%s',
+ agent_result)
+ return agent_result
- # Then, execute the Prototyper agent to refine the fuzz target.
- agent = self.get_agent(agent_name='ContextAnalyzer')
except RuntimeError:
agent = self.get_agent(agent_name='SemanticAnalyzer')
else: