From 5387cb74ee2d672b84821866f8ec58a7193644c2 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Mon, 7 Jul 2025 21:03:40 +0000 Subject: [PATCH 01/12] Created and integrated a context analyzer to oss-fuzz-gen --- agent/base_agent.py | 12 +- agent/context_analyzer.py | 304 ++++++++++++++++++ common/cloud_builder.py | 7 +- experimental/build_generator/llm_agent.py | 9 +- llm_toolkit/prompt_builder.py | 115 ++++++- .../agent/context-analyzer-description.txt | 1 + .../agent/context-analyzer-instruction.txt | 1 + prompts/agent/context-analyzer-priming.txt | 72 +++++ .../context-analyzer-response.txt | 16 + results.py | 42 ++- run_one_experiment.py | 5 + stage/analysis_stage.py | 6 + 12 files changed, 577 insertions(+), 13 deletions(-) create mode 100644 agent/context_analyzer.py create mode 100644 prompts/agent/context-analyzer-description.txt create mode 100644 prompts/agent/context-analyzer-instruction.txt create mode 100644 prompts/agent/context-analyzer-priming.txt create mode 100644 prompts/template_xml/context-analyzer-response.txt diff --git a/agent/base_agent.py b/agent/base_agent.py index 4ca933843..29b7a260f 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -167,9 +167,12 @@ def _container_handle_bash_command(self, response: str, tool: BaseTool, prompt.append(prompt_text) return prompt - def _container_handle_invalid_tool_usage(self, tools: list[BaseTool], - cur_round: int, response: str, - prompt: Prompt) -> Prompt: + def _container_handle_invalid_tool_usage(self, + tools: list[BaseTool], + cur_round: int, + response: str, + prompt: Prompt, + extra: str = '') -> Prompt: """Formats a prompt to re-teach LLM how to use the |tool|.""" logger.warning('ROUND %02d Invalid response from LLM: %s', cur_round, @@ -180,6 +183,9 @@ def _container_handle_invalid_tool_usage(self, tools: list[BaseTool], for tool in tools: prompt_text += f'{tool.tutorial()}\n\n' prompt.append(prompt_text) + # We add any additional information to the prompt. + if extra: + prompt.append(extra) return prompt def _container_handle_bash_commands(self, response: str, tool: BaseTool, diff --git a/agent/context_analyzer.py b/agent/context_analyzer.py new file mode 100644 index 000000000..f6d64350e --- /dev/null +++ b/agent/context_analyzer.py @@ -0,0 +1,304 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +An LLM agent to analyze a function and identify its implicit requirements. +The results of this analysis will be used by the writer agents to +generate correct fuzz target for the function. +""" + +import argparse + +import logger +import results as resultslib +from agent import base_agent +from data_prep import introspector +from experiment import benchmark as benchmarklib +from experiment.workdir import WorkDirs +from llm_toolkit import models, prompt_builder, prompts +from tool import container_tool + + +class ContextAnalyzer(base_agent.ADKBaseAgent): + """An LLM agent to analyze the feasibility of crashes. + """ + + def __init__(self, + trial: int, + llm: models.LLM, + args: argparse.Namespace, + benchmark: benchmarklib.Benchmark, + name: str = ''): + + builder = prompt_builder.ContextAnalyzerTemplateBuilder(llm, benchmark) + + description = builder.get_description().get() + + instruction = builder.get_instruction().get() + + tools = [self.get_function_implementation, self.search_project_files] + + super().__init__(trial, llm, args, benchmark, description, instruction, + tools, name) + + self.project_functions = None + + def write_requirements_to_file(self, args, requirements: str) -> str: + """Write the requirements to a file.""" + if not requirements: + logger.warning('No requirements to write to file.', trial=self.trial) + return '' + + requirement_path = args.work_dirs.requirements_file_path(self.trial) + + with open(requirement_path, 'w') as f: + f.write(requirements) + + logger.info('Requirements written to %s', + requirement_path, + trial=self.trial) + + return requirement_path + + def validate_llm_response(self, response: str) -> bool: + """Validate the LLM response for the required tags.""" + if not response: + logger.error('LLM response is empty.', trial=self.trial) + return False + + feasible = self._parse_tag(response, 'feasible') + analysis = self._parse_tag(response, 'analysis') + recommendations = self._parse_tag(response, 'recommendations') + + if not feasible or feasible.strip() not in ['False', 'True']: + logger.error('LLM response does not contain a valid "feasible" tag.', + trial=self.trial) + return False + + if not analysis: + logger.error('LLM response does not contain an "analysis" tag.', + trial=self.trial) + return False + + if feasible.strip() == 'False' and not recommendations: + logger.error('LLM response does not provide any recommendation for a non-feasible crash.', + trial=self.trial) + return False + + return True + + def handle_llm_response( + self, final_response_text: str, + result: resultslib.CrashContextResult) -> prompts.Prompt: + """Handle the LLM response and update the result.""" + + feasible = self._parse_tag(final_response_text, 'feasible') + analysis = self._parse_tag(final_response_text, 'analysis') + + template_builder = prompt_builder.ContextAnalyzerTemplateBuilder( + self.llm, self.benchmark) + + if not self.validate_llm_response(final_response_text): + return self._container_handle_invalid_tool_usage( + [self.inspect_tool], self.round, final_response_text, + template_builder.build(), template_builder.get_response_format()) + + if feasible == 'False': + result.feasible = False + elif feasible == 'True': + result.feasible = True + else: + logger.error('Unexpected conclusion from LLM response: %s.', + feasible, + trial=self.trial) + + result.analysis = analysis + + recommendations = self._parse_tag(final_response_text, 'recommendations') + if recommendations: + result.recommendations = recommendations.strip() + + return template_builder.build() + + def execute(self, + result_history: list[resultslib.Result]) -> resultslib.Result: + """Execute the agent with the given results.""" + + WorkDirs(self.args.work_dirs.base, keep=True) + logger.info('Executing %s', self.name, trial=self.trial) + + last_result = result_history[-1] + + if not isinstance( + last_result, resultslib.AnalysisResult) or not last_result.crash_result: + logger.error( + f'Expected last result to be AnalysisResult, got {type(last_result)}.', + trial=self.trial) + return last_result + + context_result = resultslib.CrashContextResult() + + # Initialize the ProjectContainerTool for local file search + self.inspect_tool = container_tool.ProjectContainerTool(self.benchmark) + self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null') + + # Call the agent asynchronously and return the result + prompt = self._initial_prompt(result_history) + + if not prompt or not prompt.get(): + logger.error('Failed to build initial prompt for FunctionAnalyzer.', + trial=self.trial) + return last_result + + while self.round < self.max_round and prompt.get(): + + final_response_text = self.chat_llm(self.round, + client=None, + prompt=prompt, + trial=result_history[-1].trial) + + prompt = self.handle_llm_response(final_response_text, context_result) + + self.inspect_tool.terminate() + + analysis_result = resultslib.AnalysisResult( + author=self, + run_result=last_result.run_result, + crash_result=last_result.crash_result, + crash_context_result=context_result, + chat_history={self.name: last_result.crash_result.to_dict()}) + + return analysis_result + + def _initial_prompt(self, results: list[resultslib.Result]) -> prompts.Prompt: + """Create the initial prompt for the agent.""" + + last_result = results[-1] + + # Initialize the prompt builder + builder = prompt_builder.ContextAnalyzerTemplateBuilder( + self.llm, self.benchmark) + + if isinstance(last_result, + resultslib.AnalysisResult) and last_result.crash_result: + function_requirements = self.get_function_requirements() + prompt = builder.build_context_analysis_prompt( + last_result, function_requirements, self.inspect_tool.tutorial(), + self.inspect_tool.project_dir) + else: + logger.error( + f'Unexpected result type {type(last_result)} ' + 'or no last build result found.', + trial=self.trial) + prompt = prompts.TextPrompt() + + return prompt + + def search_project_files(self, request: str) -> str: + """ + This function tool uses bash commands to search the project's source files, + and retrieve requested code snippets or file contents. + Args: + request (str): The bash command to execute and its justification, + formatted using the and tags. + Returns: + str: The response from executing the bash commands, + formatted using the , and tags. + """ + + self.log_llm_response(request) + + prompt = prompt_builder.DefaultTemplateBuilder(self.llm, None).build([]) + + if request: + prompt = self._container_handle_bash_commands(request, self.inspect_tool, + prompt) + + # Finally check invalid request. + if not request or not prompt.get(): + prompt = self._container_handle_invalid_tool_usage([self.inspect_tool], 0, + request, prompt) + + tool_response = prompt.get() + + self.log_llm_prompt(tool_response) + + return tool_response + + def get_function_implementation(self, project_name: str, + function_name: str) -> str: + """ + Retrieves a function's source from the fuzz introspector API, + using the project's name and function's name + + Args: + project_name (str): The name of the project. + function_name (str): The name of the function. + + Returns: + str: Source code of the function if found, otherwise an empty string. + """ + request = f""" + Requesting implementation for the function: + Function name: {function_name} + Project name: {project_name} + """ + + self.log_llm_response(request) + + if self.project_functions is None: + logger.info( + 'Project functions not initialized. Initializing for project "%s".', + project_name, + trial=self.trial) + functions_list = introspector.query_introspector_all_functions( + project_name) + + if functions_list: + self.project_functions = { + func['debug_summary']['name']: func + for func in functions_list + if isinstance(func.get('debug_summary'), dict) and + isinstance(func['debug_summary'].get('name'), str) and + func['debug_summary']['name'].strip() + } + else: + self.project_functions = None + + response = f""" + Project name: {project_name} + Function name: {function_name} + """ + function_source = '' + + if self.project_functions: + function_dict = self.project_functions.get(function_name, {}) + function_signature = function_dict.get('function_signature', '') + + function_source = introspector.query_introspector_function_source( + project_name, function_signature) + + if function_source.strip(): + response += f""" + Function source code: + {function_source} + """ + else: + logger.error('Error: Function with name "%s" not found in project "%s".', + function_name, + project_name, + trial=self.trial) + + self.log_llm_prompt(response) + + return response \ No newline at end of file diff --git a/common/cloud_builder.py b/common/cloud_builder.py index 58d1e05f3..58719f1d1 100644 --- a/common/cloud_builder.py +++ b/common/cloud_builder.py @@ -482,7 +482,12 @@ def _update_experiment_directory(self, experiment_path: str, with tempfile.TemporaryDirectory() as tmpdirname: temp_dest_path = os.path.join(tmpdirname, os.path.basename(new_experiment_url)) - self._download_from_gcs(temp_dest_path) + try: + self._download_from_gcs(temp_dest_path) + except NotFound as e: + logging.error('Failed to download new experiment archive from %s: %s', + new_experiment_url, e) + return # Extract the downloaded archive, without replacing any existing files. tar_command = [ diff --git a/experimental/build_generator/llm_agent.py b/experimental/build_generator/llm_agent.py index 14426b6a8..1aa23b32d 100644 --- a/experimental/build_generator/llm_agent.py +++ b/experimental/build_generator/llm_agent.py @@ -458,9 +458,12 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: return prompt - def _container_handle_invalid_tool_usage(self, tools: list[BaseTool], - cur_round: int, response: str, - prompt: Prompt) -> Prompt: + def _container_handle_invalid_tool_usage(self, + tools: list[BaseTool], + cur_round: int, + response: str, + prompt: Prompt, + extra: str = '') -> Prompt: """Formats a prompt to re-teach LLM how to use the |tool|.""" # pylint: disable=unused-argument diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 4ea4acdab..c0dc5f87b 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -28,7 +28,8 @@ from experiment.benchmark import Benchmark, FileType from experiment.fuzz_target_error import SemanticCheckResult from llm_toolkit import models, prompts -from results import BuildResult, CoverageResult, RunResult +from results import AnalysisResult, BuildResult, CoverageResult, RunResult +from tool.base_tool import BaseTool logger = logging.getLogger(__name__) @@ -1003,6 +1004,118 @@ def build(self, 'Use build_prompt() instead.') +class ContextAnalyzerTemplateBuilder(DefaultTemplateBuilder): + """Builder for function analyzer.""" + + def __init__(self, + model: models.LLM, + benchmark: Optional[Benchmark] = None, + template_dir: str = DEFAULT_TEMPLATE_DIR, + initial: Any = None): + super().__init__(model, benchmark, template_dir, initial) + + # Load templates. + self.context_analyzer_instruction_file = self._find_template( + AGENT_TEMPLATE_DIR, 'context-analyzer-instruction.txt') + self.context_analyzer_description_file = self._find_template( + AGENT_TEMPLATE_DIR, 'context-analyzer-description.txt') + self.context_analyzer_prompt_template_file = self._find_template( + AGENT_TEMPLATE_DIR, 'context-analyzer-priming.txt') + self.context_analyzer_response_file = self._find_template( + DEFAULT_TEMPLATE_DIR, 'context-analyzer-response.txt') + + def get_instruction(self) -> prompts.Prompt: + """Constructs a prompt using the templates in |self| and saves it.""" + + self._prompt = self._model.prompt_type()(None) + if not self.benchmark: + return self._prompt + + prompt = self._get_template(self.context_analyzer_instruction_file) + + self._prompt.append(prompt) + + return self._prompt + + def get_description(self) -> prompts.Prompt: + """Constructs a prompt using the templates in |self| and saves it.""" + + self._prompt = self._model.prompt_type()(None) + if not self.benchmark: + return self._prompt + + prompt = self._get_template(self.context_analyzer_description_file) + + self._prompt.append(prompt) + + return self._prompt + + def build_context_analysis_prompt(self, + last_result: AnalysisResult, + function_requirements: str, + tool_guides: str = '', + project_dir: str = '') -> prompts.Prompt: + """Constructs a prompt using the templates in |self| and saves it.""" + + if not self.benchmark: + logger.error( + 'No benchmark provided for function analyzer template builder.') + return self._prompt + + prompt = self._get_template(self.context_analyzer_prompt_template_file) + + prompt = prompt.replace('{PROJECT_NAME}', self.benchmark.project) + prompt = prompt.replace('{PROJECT_DIR}', project_dir) + + # Add the function source + func_source = introspector.query_introspector_function_source( + self.benchmark.project, self.benchmark.function_signature) + + if not func_source: + logger.error('No function source found for project: %s, function: %s', + self.benchmark.project, self.benchmark.function_signature) + + crash_result = last_result.crash_result + run_result = last_result.run_result + + if not crash_result or not run_result: + logger.error('No crash or run result found for project: %s, function: %s', + self.benchmark.project, self.benchmark.function_signature) + return self._prompt + + # Add the fuzz driver and crash results + prompt = prompt.replace('{FUZZ_DRIVER}', run_result.fuzz_target_source) + prompt = prompt.replace('{CRASH_ANALYSIS}', crash_result.insight) + prompt = prompt.replace('{CRASH_STACKTRACE}', crash_result.stacktrace) + + # Add the function requirements + prompt = prompt.replace('{FUNCTION_REQUIREMENTS}', function_requirements) + + response_format = self._get_template(self.context_analyzer_response_file) + + if response_format: + prompt = prompt.replace('{RESPONSE_FORMAT}', response_format) + + self._prompt.append(prompt) + self._prompt.append(tool_guides) + + return self._prompt + + def get_response_format(self) -> str: + """Returns the response format for the context analyzer.""" + return self._get_template(self.context_analyzer_response_file) + + def build(self, + example_pair: Optional[list[list[str]]] = None, + project_example_content: Optional[list[list[str]]] = None, + project_context_content: Optional[dict] = None, + tool_guides: str = '', + project_dir: str = '', + project_name: str = '', + function_signature: str = '') -> prompts.Prompt: + + return self._prompt + class CrashAnalyzerTemplateBuilder(DefaultTemplateBuilder): """Builder for C/C++.""" diff --git a/prompts/agent/context-analyzer-description.txt b/prompts/agent/context-analyzer-description.txt new file mode 100644 index 000000000..b8c82e613 --- /dev/null +++ b/prompts/agent/context-analyzer-description.txt @@ -0,0 +1 @@ +An agent to analyze the context of a crashing function in a project and determine the feasibility of a crash. \ No newline at end of file diff --git a/prompts/agent/context-analyzer-instruction.txt b/prompts/agent/context-analyzer-instruction.txt new file mode 100644 index 000000000..e4c59c9b6 --- /dev/null +++ b/prompts/agent/context-analyzer-instruction.txt @@ -0,0 +1 @@ +You are a security engineer tasked with analyzing the context of a crashing function within a project and determining the feasibility of a reported crash from the project's external functions. \ No newline at end of file diff --git a/prompts/agent/context-analyzer-priming.txt b/prompts/agent/context-analyzer-priming.txt new file mode 100644 index 000000000..b88f0357a --- /dev/null +++ b/prompts/agent/context-analyzer-priming.txt @@ -0,0 +1,72 @@ + +You are a professional security engineer developing fuzz drivers for the project {PROJECT_NAME}. +The previous fuzz driver you developed caused a crash in the project. +However, this crash is NOT feasible and cannot be triggered from the project's external entry points (public functions with no non-test callers). +We have provided you with the fuzz driver that caused the crash, its requirements, the crash stacktrace, an analysis of the crash, +and a set of tools that you can use to navigate the project's codebase. +Your goal is to deeply analyze the project and the context of the crashing function and provide detailed source code evidence that the crash is NOT feasible from any entry point in the program. +If the crash is indeed not feasible, also provide a recommendation for fixing the fuzz driver. +If the crash is feasible, provide source code evidence that shows the sequence of execution from the project's entry point that can cause the crash. + + + +Follow these steps to determine the feasibility of this crash. + +Step 1: Identify the information you need to analyze the crash and the function's context. +This includes: + - The crashing function called by the fuzz driver + - The different functions where it is called from + - The entry points of the project + - The crash stacktrace and analysis + - The state of input and global variables that lead to the crash + - Locations in the code where those input variables could have been initialized + - Execution paths between the input variable initializations and the crashing function + +Step 2: Collect information using the provided tools +Use the provided tools (see section) and follow their rules to gather the necessary information. You can collect information from: + - The project source code directory `{PROJECT_DIR}/` cloned from the project repository. + - External API using the get_function_implementation tool. + - Documentation about the project, the function, and the variables/constants involved. + +Step 3: Identify the specific input condition that leads to the crash. +This is the input variable values, global variable values or combination of input and global variable values that cause the crash. +You can get these states by reviewing: + - The crash analysis + - The functions identified in the stack trace + - The fuzz driver + +Step 4: Analyze the project and provide proof, with source code evidence, that the crash cannot be triggered from a the project's entry points (public non-test functions). + - If the crash is not feasible, this proof should show why the crash conditions cannot be satisfied in the different execution paths. + - If the crash is feasible, this proof should show how the crash conditions are satisfied when the crashing function is called from the program's entry points. + - DO NOT SPECULATE. Provide source code evidence to back up any statement you make about the crash's feasibility. + +Step 5: If the crash is not feasible, recommend necessary and minimal modifications to the fuzz driver to prevent the crash. + - The modifications should mirror how the crashing function is called in different project locations. + +Step 8: MAKE SURE YOU PROVIDE YOUR RESPONSE IN THE REQUIRED FORMAT BELOW. + + +Here is the provided data. + + +{FUZZ_DRIVER} + + + +{FUNCTION_REQUIREMENTS} + + + +{CRASH_STACKTRACE} + + + +{CRASH_ANALYSIS} + + +{RESPONSE_FORMAT} + +You will be provided with the following tools. +1. get_function_implementation: This is a tool you can use to retrieve the implementation of a function using the project's name and function's name as arguments. +2. search_project_files: This is an interactive tool you can use to search the project's source file using bash commands and find definitions or usages of functions, classes, structs, and variables. + The usage guide for the Bash Tool is provided below. \ No newline at end of file diff --git a/prompts/template_xml/context-analyzer-response.txt b/prompts/template_xml/context-analyzer-response.txt new file mode 100644 index 000000000..563e590dc --- /dev/null +++ b/prompts/template_xml/context-analyzer-response.txt @@ -0,0 +1,16 @@ +You MUST return your final response in the format below. + + +Make sure your response follows the following format. + + +'True' if the crash can be triggered from a project's external entry points, 'False' otherwise + + +Your analysis of the feasibility of this crash + + +Recommendations for modifying the fuzz driver to prevent the crash if the crash is not feasible + + + \ No newline at end of file diff --git a/results.py b/results.py index 69c17c03c..ecab2c011 100644 --- a/results.py +++ b/results.py @@ -306,6 +306,24 @@ def __repr__(self) -> str: return f'{self.__class__.__name__}({", ".join(attributes)})' +class CrashContextResult(): + """Analysis result of the context of the crashing function.""" + feasible: bool + analysis: str + recommendations: str + + def __init__(self, feasible: bool = False, analysis: str = '', recommendations: str = ''): + self.feasible = feasible + self.analysis = analysis + self.recommendations = recommendations + + def to_dict(self) -> dict: + return { + 'feasible': self.feasible, + 'analysis': self.analysis, + 'recommendations': self.recommendations, + } + # TODO: Make this class an attribute of Result, avoid too many attributes in one # class. class AnalysisResult(Result): @@ -313,6 +331,7 @@ class AnalysisResult(Result): run_result: RunResult semantic_result: Optional[SemanticCheckResult] crash_result: Optional[CrashResult] + crash_context_result: Optional[CrashContextResult] coverage_result: Optional[CoverageResult] def __init__(self, @@ -320,6 +339,7 @@ def __init__(self, run_result: RunResult, semantic_result: Optional[SemanticCheckResult] = None, crash_result: Optional[CrashResult] = None, + crash_context_result: Optional[CrashContextResult] = None, coverage_result: Optional[CoverageResult] = None, chat_history: Optional[dict] = None, default_success: bool = False) -> None: @@ -330,6 +350,7 @@ def __init__(self, self.run_result = run_result self.semantic_result = semantic_result self.crash_result = crash_result + self.crash_context_result = crash_context_result self.coverage_result = coverage_result def to_dict(self) -> dict: @@ -338,6 +359,8 @@ def to_dict(self) -> dict: self.semantic_result.to_dict() if self.semantic_result else {}, 'crash_result': self.crash_result.to_dict() if self.crash_result else {}, + 'crash_context_result': + self.crash_context_result.to_dict() if self.crash_context_result else {}, 'coverage_result': self.coverage_result.to_dict() if self.coverage_result else {}, } @@ -350,6 +373,10 @@ def success(self) -> bool: return not self.semantic_result.has_err if self.coverage_result: return not self.coverage_result.improve_required + if self.crash_result: + return self.crash_result.true_bug + if self.crash_context_result: + return self.crash_context_result.feasible return False @property @@ -416,9 +443,11 @@ def best_analysis_result(self) -> Optional[AnalysisResult]: # 1. Crashed for a non-semantic error for result in self.result_history[::-1]: #TODO(dongge): Refine this logic for coverage - if (isinstance(result, AnalysisResult) and result.crashes and - result.crash_result and result.crash_result.true_bug): - return result + if (isinstance(result, AnalysisResult) and result.crashes): + if result.crash_context_result and result.crash_context_result.feasible: + return result + elif result.crash_result and result.crash_result.true_bug: + return result # 2. Crashed for result in self.result_history[::-1]: @@ -588,8 +617,11 @@ def log_path(self) -> str: def is_semantic_error(self) -> bool: """Validates if the best AnalysisResult has semantic error.""" result = self.best_analysis_result - if result and result.crash_result: - return not result.crash_result.true_bug + if result: + if result.crash_context_result: + return not result.crash_context_result.feasible + elif result.crash_result: + return not result.crash_result.true_bug return False @property diff --git a/run_one_experiment.py b/run_one_experiment.py index 051c89dff..3517d6365 100644 --- a/run_one_experiment.py +++ b/run_one_experiment.py @@ -22,6 +22,7 @@ from multiprocessing import pool from typing import List, Optional +from agent.context_analyzer import ContextAnalyzer import logger import pipeline from agent.coverage_analyzer import CoverageAnalyzer @@ -287,6 +288,10 @@ def _fuzzing_pipeline(benchmark: Benchmark, model: models.LLM, llm=model, args=args), CrashAnalyzer(trial=trial, llm=model, args=args), + ContextAnalyzer(trial=trial, + llm=model, + args=args, + benchmark=benchmark), ]) else: writer_agents = [] diff --git a/stage/analysis_stage.py b/stage/analysis_stage.py index 101daaf67..68b02daf2 100644 --- a/stage/analysis_stage.py +++ b/stage/analysis_stage.py @@ -37,6 +37,12 @@ def execute(self, result_history: list[Result]) -> Result: if last_result.crashes: try: agent = self.get_agent(agent_name='CrashAnalyzer') + agent_result = self._execute_agent(agent, result_history) + self.logger.write_chat_history(agent_result) + result_history.append(agent_result) + + # Then, execute the Prototyper agent to refine the fuzz target. + agent = self.get_agent(agent_name='ContextAnalyzer') except RuntimeError: agent = self.get_agent(agent_name='SemanticAnalyzer') else: From b93f9b3fe50f852b6b79bb03ae8fd08d2ac2e1d1 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Mon, 7 Jul 2025 21:09:06 +0000 Subject: [PATCH 02/12] Presubmit fixes --- agent/context_analyzer.py | 7 ++++--- llm_toolkit/prompt_builder.py | 6 +++++- results.py | 13 +++++++++---- run_one_experiment.py | 2 +- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/agent/context_analyzer.py b/agent/context_analyzer.py index f6d64350e..6dffee55d 100644 --- a/agent/context_analyzer.py +++ b/agent/context_analyzer.py @@ -91,8 +91,9 @@ def validate_llm_response(self, response: str) -> bool: return False if feasible.strip() == 'False' and not recommendations: - logger.error('LLM response does not provide any recommendation for a non-feasible crash.', - trial=self.trial) + logger.error( + 'LLM response does not provide any recommendation for a non-feasible crash.', + trial=self.trial) return False return True @@ -301,4 +302,4 @@ def get_function_implementation(self, project_name: str, self.log_llm_prompt(response) - return response \ No newline at end of file + return response diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index c0dc5f87b..3ef289114 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -29,7 +29,6 @@ from experiment.fuzz_target_error import SemanticCheckResult from llm_toolkit import models, prompts from results import AnalysisResult, BuildResult, CoverageResult, RunResult -from tool.base_tool import BaseTool logger = logging.getLogger(__name__) @@ -842,6 +841,7 @@ def build(self, if function_requirements: requirements = (f'\nHere are the requirements for the function.\n' f'{function_requirements}\n') + problem += requirements self._prepare_prompt(priming, problem) return self._prompt @@ -1113,9 +1113,13 @@ def build(self, project_dir: str = '', project_name: str = '', function_signature: str = '') -> prompts.Prompt: + """Returns an empty prompt.""" + + del (example_pair, project_example_content, project_context_content) return self._prompt + class CrashAnalyzerTemplateBuilder(DefaultTemplateBuilder): """Builder for C/C++.""" diff --git a/results.py b/results.py index ecab2c011..84ec9ebd3 100644 --- a/results.py +++ b/results.py @@ -312,7 +312,10 @@ class CrashContextResult(): analysis: str recommendations: str - def __init__(self, feasible: bool = False, analysis: str = '', recommendations: str = ''): + def __init__(self, + feasible: bool = False, + analysis: str = '', + recommendations: str = ''): self.feasible = feasible self.analysis = analysis self.recommendations = recommendations @@ -324,6 +327,7 @@ def to_dict(self) -> dict: 'recommendations': self.recommendations, } + # TODO: Make this class an attribute of Result, avoid too many attributes in one # class. class AnalysisResult(Result): @@ -360,7 +364,8 @@ def to_dict(self) -> dict: 'crash_result': self.crash_result.to_dict() if self.crash_result else {}, 'crash_context_result': - self.crash_context_result.to_dict() if self.crash_context_result else {}, + self.crash_context_result.to_dict() + if self.crash_context_result else {}, 'coverage_result': self.coverage_result.to_dict() if self.coverage_result else {}, } @@ -446,7 +451,7 @@ def best_analysis_result(self) -> Optional[AnalysisResult]: if (isinstance(result, AnalysisResult) and result.crashes): if result.crash_context_result and result.crash_context_result.feasible: return result - elif result.crash_result and result.crash_result.true_bug: + if result.crash_result and result.crash_result.true_bug: return result # 2. Crashed @@ -620,7 +625,7 @@ def is_semantic_error(self) -> bool: if result: if result.crash_context_result: return not result.crash_context_result.feasible - elif result.crash_result: + if result.crash_result: return not result.crash_result.true_bug return False diff --git a/run_one_experiment.py b/run_one_experiment.py index 3517d6365..13c9f98fa 100644 --- a/run_one_experiment.py +++ b/run_one_experiment.py @@ -22,9 +22,9 @@ from multiprocessing import pool from typing import List, Optional -from agent.context_analyzer import ContextAnalyzer import logger import pipeline +from agent.context_analyzer import ContextAnalyzer from agent.coverage_analyzer import CoverageAnalyzer from agent.crash_analyzer import CrashAnalyzer from agent.enhancer import Enhancer From 8e23fc16a92a13773642492b1688719f453e00d4 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 8 Jul 2025 14:31:00 +0000 Subject: [PATCH 03/12] Completed text changes in PR review --- agent/base_agent.py | 2 +- agent/context_analyzer.py | 20 ++++--------------- experimental/build_generator/llm_agent.py | 2 +- llm_toolkit/prompt_builder.py | 4 ++-- prompts/agent/context-analyzer-priming.txt | 20 +++++++++---------- .../context-analyzer-response.txt | 2 +- 6 files changed, 19 insertions(+), 31 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 29b7a260f..3e27a4c83 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -173,7 +173,7 @@ def _container_handle_invalid_tool_usage(self, response: str, prompt: Prompt, extra: str = '') -> Prompt: - """Formats a prompt to re-teach LLM how to use the |tool|.""" + """Formats a prompt to re-teach LLM how to use the |tools|, appended with |extra| information""" logger.warning('ROUND %02d Invalid response from LLM: %s', cur_round, response, diff --git a/agent/context_analyzer.py b/agent/context_analyzer.py index 6dffee55d..34a93df8a 100644 --- a/agent/context_analyzer.py +++ b/agent/context_analyzer.py @@ -41,29 +41,22 @@ def __init__(self, name: str = ''): builder = prompt_builder.ContextAnalyzerTemplateBuilder(llm, benchmark) - description = builder.get_description().get() - instruction = builder.get_instruction().get() - tools = [self.get_function_implementation, self.search_project_files] - super().__init__(trial, llm, args, benchmark, description, instruction, tools, name) - self.project_functions = None def write_requirements_to_file(self, args, requirements: str) -> str: - """Write the requirements to a file.""" + """Writes the requirements to a file.""" if not requirements: logger.warning('No requirements to write to file.', trial=self.trial) return '' requirement_path = args.work_dirs.requirements_file_path(self.trial) - with open(requirement_path, 'w') as f: f.write(requirements) - logger.info('Requirements written to %s', requirement_path, trial=self.trial) @@ -152,26 +145,21 @@ def execute(self, # Initialize the ProjectContainerTool for local file search self.inspect_tool = container_tool.ProjectContainerTool(self.benchmark) self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null') - - # Call the agent asynchronously and return the result + # Prepare initial prompt for the agent prompt = self._initial_prompt(result_history) - if not prompt or not prompt.get(): logger.error('Failed to build initial prompt for FunctionAnalyzer.', trial=self.trial) return last_result - + # Chat with the LLM asynchronously and return the result while self.round < self.max_round and prompt.get(): - final_response_text = self.chat_llm(self.round, client=None, prompt=prompt, trial=result_history[-1].trial) - prompt = self.handle_llm_response(final_response_text, context_result) - + # Terminate the inspect tool after the analysis is done self.inspect_tool.terminate() - analysis_result = resultslib.AnalysisResult( author=self, run_result=last_result.run_result, diff --git a/experimental/build_generator/llm_agent.py b/experimental/build_generator/llm_agent.py index 1aa23b32d..82f23a33c 100644 --- a/experimental/build_generator/llm_agent.py +++ b/experimental/build_generator/llm_agent.py @@ -464,7 +464,7 @@ def _container_handle_invalid_tool_usage(self, response: str, prompt: Prompt, extra: str = '') -> Prompt: - """Formats a prompt to re-teach LLM how to use the |tool|.""" + """Formats a prompt to re-teach LLM how to use the |tools|, appended with |extra| information""" # pylint: disable=unused-argument logger.warning('ROUND %02d Invalid response from LLM: %s', diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 3ef289114..4012e07d9 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -1083,8 +1083,8 @@ def build_context_analysis_prompt(self, self.benchmark.project, self.benchmark.function_signature) return self._prompt - # Add the fuzz driver and crash results - prompt = prompt.replace('{FUZZ_DRIVER}', run_result.fuzz_target_source) + # Add the fuzz target and crash results + prompt = prompt.replace('{FUZZ_TARGET}', run_result.fuzz_target_source) prompt = prompt.replace('{CRASH_ANALYSIS}', crash_result.insight) prompt = prompt.replace('{CRASH_STACKTRACE}', crash_result.stacktrace) diff --git a/prompts/agent/context-analyzer-priming.txt b/prompts/agent/context-analyzer-priming.txt index b88f0357a..f2e802d27 100644 --- a/prompts/agent/context-analyzer-priming.txt +++ b/prompts/agent/context-analyzer-priming.txt @@ -1,11 +1,11 @@ -You are a professional security engineer developing fuzz drivers for the project {PROJECT_NAME}. -The previous fuzz driver you developed caused a crash in the project. +You are a professional security engineer developing fuzz targets for the project {PROJECT_NAME}. +The previous fuzz target you developed caused a crash in the project. However, this crash is NOT feasible and cannot be triggered from the project's external entry points (public functions with no non-test callers). -We have provided you with the fuzz driver that caused the crash, its requirements, the crash stacktrace, an analysis of the crash, +We have provided you with the fuzz target that caused the crash, its requirements, the crash stacktrace, an analysis of the crash, and a set of tools that you can use to navigate the project's codebase. Your goal is to deeply analyze the project and the context of the crashing function and provide detailed source code evidence that the crash is NOT feasible from any entry point in the program. -If the crash is indeed not feasible, also provide a recommendation for fixing the fuzz driver. +If the crash is indeed not feasible, also provide a recommendation for fixing the fuzz target. If the crash is feasible, provide source code evidence that shows the sequence of execution from the project's entry point that can cause the crash. @@ -14,7 +14,7 @@ Follow these steps to determine the feasibility of this crash. Step 1: Identify the information you need to analyze the crash and the function's context. This includes: - - The crashing function called by the fuzz driver + - The crashing function called by the fuzz target - The different functions where it is called from - The entry points of the project - The crash stacktrace and analysis @@ -33,14 +33,14 @@ This is the input variable values, global variable values or combination of inpu You can get these states by reviewing: - The crash analysis - The functions identified in the stack trace - - The fuzz driver + - The fuzz target Step 4: Analyze the project and provide proof, with source code evidence, that the crash cannot be triggered from a the project's entry points (public non-test functions). - If the crash is not feasible, this proof should show why the crash conditions cannot be satisfied in the different execution paths. - If the crash is feasible, this proof should show how the crash conditions are satisfied when the crashing function is called from the program's entry points. - DO NOT SPECULATE. Provide source code evidence to back up any statement you make about the crash's feasibility. -Step 5: If the crash is not feasible, recommend necessary and minimal modifications to the fuzz driver to prevent the crash. +Step 5: If the crash is not feasible, recommend necessary and minimal modifications to the fuzz target to prevent the crash. - The modifications should mirror how the crashing function is called in different project locations. Step 8: MAKE SURE YOU PROVIDE YOUR RESPONSE IN THE REQUIRED FORMAT BELOW. @@ -48,9 +48,9 @@ Step 8: MAKE SURE YOU PROVIDE YOUR RESPONSE IN THE REQUIRED FORMAT BELOW. Here is the provided data. - -{FUZZ_DRIVER} - + +{FUZZ_TARGET} + {FUNCTION_REQUIREMENTS} diff --git a/prompts/template_xml/context-analyzer-response.txt b/prompts/template_xml/context-analyzer-response.txt index 563e590dc..41d63d0fd 100644 --- a/prompts/template_xml/context-analyzer-response.txt +++ b/prompts/template_xml/context-analyzer-response.txt @@ -10,7 +10,7 @@ Make sure your response follows the following format. Your analysis of the feasibility of this crash -Recommendations for modifying the fuzz driver to prevent the crash if the crash is not feasible +Recommendations for modifying the fuzz target to prevent the crash if the crash is not feasible \ No newline at end of file From b8eef8b555c4da33139f86f2c253e19da53c31e7 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 8 Jul 2025 18:54:50 +0000 Subject: [PATCH 04/12] Added support for context analyzer to report final result using function call --- agent/base_agent.py | 17 ++--- agent/context_analyzer.py | 83 +++++++++++++--------- llm_toolkit/prompt_builder.py | 6 -- prompts/agent/context-analyzer-priming.txt | 11 +-- results.py | 12 ++++ 5 files changed, 79 insertions(+), 50 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index 3e27a4c83..d65b86d72 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -369,7 +369,7 @@ def __init__(self, logger.info('ADK Agent %s created.', self.name, trial=self.trial) def chat_llm(self, cur_round: int, client: Any, prompt: Prompt, - trial: int) -> str: + trial: int) -> Any: """Call the agent with the given prompt, running async code in sync.""" self.round = cur_round @@ -382,7 +382,7 @@ async def _call(): content = types.Content(role='user', parts=[types.Part(text=prompt.get())]) - final_response_text = '' + final_response = None async for event in self.runner.run_async( user_id=user_id, @@ -390,16 +390,17 @@ async def _call(): new_message=content, ): if event.is_final_response(): - if (event.content and event.content.parts and - event.content.parts[0].text): - final_response_text = event.content.parts[0].text + if (event.content and event.content.parts): + if event.content.parts[0].text: + final_response = event.content.parts[0].text + self.log_llm_response(final_response) + elif event.content.parts[0].function_response: + final_response = event.content.parts[0].function_response.response elif event.actions and event.actions.escalate: error_message = event.error_message logger.error('Agent escalated: %s', error_message, trial=self.trial) - self.log_llm_response(final_response_text) - - return final_response_text + return final_response return self.llm.with_retry_on_error(lambda: asyncio.run(_call()), [errors.ClientError]) diff --git a/agent/context_analyzer.py b/agent/context_analyzer.py index 34a93df8a..f0094cdc0 100644 --- a/agent/context_analyzer.py +++ b/agent/context_analyzer.py @@ -25,8 +25,10 @@ from data_prep import introspector from experiment import benchmark as benchmarklib from experiment.workdir import WorkDirs +from google.adk.tools import ToolContext from llm_toolkit import models, prompt_builder, prompts from tool import container_tool +from typing import Any class ContextAnalyzer(base_agent.ADKBaseAgent): @@ -43,7 +45,7 @@ def __init__(self, builder = prompt_builder.ContextAnalyzerTemplateBuilder(llm, benchmark) description = builder.get_description().get() instruction = builder.get_instruction().get() - tools = [self.get_function_implementation, self.search_project_files] + tools = [self.get_function_implementation, self.search_project_files, self.report_final_result] super().__init__(trial, llm, args, benchmark, description, instruction, tools, name) self.project_functions = None @@ -91,38 +93,17 @@ def validate_llm_response(self, response: str) -> bool: return True - def handle_llm_response( - self, final_response_text: str, - result: resultslib.CrashContextResult) -> prompts.Prompt: + def handle_invalid_llm_response( + self, final_response_text: str) -> prompts.Prompt: """Handle the LLM response and update the result.""" - feasible = self._parse_tag(final_response_text, 'feasible') - analysis = self._parse_tag(final_response_text, 'analysis') - template_builder = prompt_builder.ContextAnalyzerTemplateBuilder( self.llm, self.benchmark) - if not self.validate_llm_response(final_response_text): - return self._container_handle_invalid_tool_usage( - [self.inspect_tool], self.round, final_response_text, - template_builder.build(), template_builder.get_response_format()) - - if feasible == 'False': - result.feasible = False - elif feasible == 'True': - result.feasible = True - else: - logger.error('Unexpected conclusion from LLM response: %s.', - feasible, - trial=self.trial) - - result.analysis = analysis + return self._container_handle_invalid_tool_usage( + [self.inspect_tool], self.round, final_response_text, + template_builder.build(), template_builder.get_response_format()) - recommendations = self._parse_tag(final_response_text, 'recommendations') - if recommendations: - result.recommendations = recommendations.strip() - - return template_builder.build() def execute(self, result_history: list[resultslib.Result]) -> resultslib.Result: @@ -140,7 +121,7 @@ def execute(self, trial=self.trial) return last_result - context_result = resultslib.CrashContextResult() + context_result = None # Initialize the ProjectContainerTool for local file search self.inspect_tool = container_tool.ProjectContainerTool(self.benchmark) @@ -152,12 +133,19 @@ def execute(self, trial=self.trial) return last_result # Chat with the LLM asynchronously and return the result - while self.round < self.max_round and prompt.get(): - final_response_text = self.chat_llm(self.round, + while self.round < self.max_round: + final_response = self.chat_llm(self.round, client=None, prompt=prompt, trial=result_history[-1].trial) - prompt = self.handle_llm_response(final_response_text, context_result) + context_result = resultslib.CrashContextResult.from_dict(final_response) + if context_result: + break + logger.error('Failed to parse LLM response into CrashContextResult.', + trial=self.trial) + prompt = self.handle_invalid_llm_response(final_response) + + # Terminate the inspect tool after the analysis is done self.inspect_tool.terminate() analysis_result = resultslib.AnalysisResult( @@ -165,7 +153,7 @@ def execute(self, run_result=last_result.run_result, crash_result=last_result.crash_result, crash_context_result=context_result, - chat_history={self.name: last_result.crash_result.to_dict()}) + chat_history={}) return analysis_result @@ -291,3 +279,34 @@ def get_function_implementation(self, project_name: str, self.log_llm_prompt(response) return response + + def report_final_result(self, feasible: bool, analysis: str, recommendations: str, tool_context: ToolContext) -> dict: + """ + Provide final result, including the crash feasibility, detailed analysis, and any recommendations. + + Args: + feasible (bool): True if the crash is feasible, False otherwise. + analysis (str): Detailed analysis and source code evidence showing why the crash is or is not feasible. + recommendations (str): Recommendations for modifying the fuzz target to prevent the crash. If the crash is feasible, this should be empty. + + Returns: + This function will not return anything to the LLM. + """ + response = f""" + + \n{feasible}\n + \n{analysis}\n + \n{recommendations}\n + + """ + self.log_llm_response(response) + crash_context_result = resultslib.CrashContextResult( + feasible=feasible, + analysis=analysis, + recommendations=recommendations + ) + + # We have received final result. Instruct the agent to terminate execution. + # tool_context._invocation_context.end_invocation = True + tool_context.actions.skip_summarization = True + return crash_context_result.to_dict() diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 4012e07d9..2b8b0cd1f 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -1090,12 +1090,6 @@ def build_context_analysis_prompt(self, # Add the function requirements prompt = prompt.replace('{FUNCTION_REQUIREMENTS}', function_requirements) - - response_format = self._get_template(self.context_analyzer_response_file) - - if response_format: - prompt = prompt.replace('{RESPONSE_FORMAT}', response_format) - self._prompt.append(prompt) self._prompt.append(tool_guides) diff --git a/prompts/agent/context-analyzer-priming.txt b/prompts/agent/context-analyzer-priming.txt index f2e802d27..3ff5e63d0 100644 --- a/prompts/agent/context-analyzer-priming.txt +++ b/prompts/agent/context-analyzer-priming.txt @@ -43,7 +43,11 @@ Step 4: Analyze the project and provide proof, with source code evidence, that t Step 5: If the crash is not feasible, recommend necessary and minimal modifications to the fuzz target to prevent the crash. - The modifications should mirror how the crashing function is called in different project locations. -Step 8: MAKE SURE YOU PROVIDE YOUR RESPONSE IN THE REQUIRED FORMAT BELOW. +Step 8: MAKE SURE YOU PROVIDE YOUR RESPONSE USING THE report_final_result TOOL. + - The tool takes three arguments - feasible, analysis and recommendations. + - feasible should be True if the crash is feasible, or False otherwise. + - analysis should contain source code evidence of the crash's feasibility. + - recommendations should only be provided for non-feasible crashes, and should recommend fuzz target modifications to prevent this crash. Here is the provided data. @@ -64,9 +68,8 @@ Here is the provided data. {CRASH_ANALYSIS} -{RESPONSE_FORMAT} - You will be provided with the following tools. 1. get_function_implementation: This is a tool you can use to retrieve the implementation of a function using the project's name and function's name as arguments. 2. search_project_files: This is an interactive tool you can use to search the project's source file using bash commands and find definitions or usages of functions, classes, structs, and variables. - The usage guide for the Bash Tool is provided below. \ No newline at end of file + The usage guide for the Bash Tool is provided below. +3. report_final_result: This is a tool you must use to report your final result. Once this tool is called, no further instructions will be provided to you. \ No newline at end of file diff --git a/results.py b/results.py index 84ec9ebd3..1039907f5 100644 --- a/results.py +++ b/results.py @@ -327,6 +327,18 @@ def to_dict(self) -> dict: 'recommendations': self.recommendations, } + @staticmethod + def from_dict(data: Any) -> Optional['CrashContextResult']: + """Creates a CrashContextResult from a dictionary.""" + + if not isinstance(data, dict) or 'feasible' not in data or 'analysis' not in data: + return None + + return CrashContextResult( + feasible=data.get('feasible', False), + analysis=data.get('analysis', ''), + recommendations=data.get('recommendations', '')) + # TODO: Make this class an attribute of Result, avoid too many attributes in one # class. From b3087e01d61b805e0ef8c48b13aee98abc006449 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 8 Jul 2025 21:53:12 +0000 Subject: [PATCH 05/12] Presubmit fixes --- agent/base_agent.py | 8 +++- agent/context_analyzer.py | 80 ++++++++++++----------------------- llm_toolkit/prompt_builder.py | 3 +- results.py | 10 ++--- 4 files changed, 41 insertions(+), 60 deletions(-) diff --git a/agent/base_agent.py b/agent/base_agent.py index d65b86d72..f4060b075 100644 --- a/agent/base_agent.py +++ b/agent/base_agent.py @@ -25,6 +25,7 @@ import requests from google.adk import agents, runners, sessions +from google.adk.tools import ToolContext from google.genai import errors, types import logger @@ -173,7 +174,8 @@ def _container_handle_invalid_tool_usage(self, response: str, prompt: Prompt, extra: str = '') -> Prompt: - """Formats a prompt to re-teach LLM how to use the |tools|, appended with |extra| information""" + """Formats a prompt to re-teach LLM how to use the |tools|, + appended with |extra| information""" logger.warning('ROUND %02d Invalid response from LLM: %s', cur_round, response, @@ -420,6 +422,10 @@ def log_llm_response(self, response: str) -> None: self.round, trial=self.trial) + def end_llm_chat(self, tool_context: ToolContext) -> None: + """Ends the LLM chat session.""" + tool_context.actions.skip_summarization = True + if __name__ == "__main__": # For cloud experiments. diff --git a/agent/context_analyzer.py b/agent/context_analyzer.py index f0094cdc0..79a27404f 100644 --- a/agent/context_analyzer.py +++ b/agent/context_analyzer.py @@ -19,16 +19,16 @@ import argparse +from google.adk.tools import ToolContext + import logger import results as resultslib from agent import base_agent from data_prep import introspector from experiment import benchmark as benchmarklib from experiment.workdir import WorkDirs -from google.adk.tools import ToolContext from llm_toolkit import models, prompt_builder, prompts from tool import container_tool -from typing import Any class ContextAnalyzer(base_agent.ADKBaseAgent): @@ -45,7 +45,10 @@ def __init__(self, builder = prompt_builder.ContextAnalyzerTemplateBuilder(llm, benchmark) description = builder.get_description().get() instruction = builder.get_instruction().get() - tools = [self.get_function_implementation, self.search_project_files, self.report_final_result] + tools = [ + self.get_function_implementation, self.search_project_files, + self.report_final_result + ] super().__init__(trial, llm, args, benchmark, description, instruction, tools, name) self.project_functions = None @@ -65,36 +68,8 @@ def write_requirements_to_file(self, args, requirements: str) -> str: return requirement_path - def validate_llm_response(self, response: str) -> bool: - """Validate the LLM response for the required tags.""" - if not response: - logger.error('LLM response is empty.', trial=self.trial) - return False - - feasible = self._parse_tag(response, 'feasible') - analysis = self._parse_tag(response, 'analysis') - recommendations = self._parse_tag(response, 'recommendations') - - if not feasible or feasible.strip() not in ['False', 'True']: - logger.error('LLM response does not contain a valid "feasible" tag.', - trial=self.trial) - return False - - if not analysis: - logger.error('LLM response does not contain an "analysis" tag.', - trial=self.trial) - return False - - if feasible.strip() == 'False' and not recommendations: - logger.error( - 'LLM response does not provide any recommendation for a non-feasible crash.', - trial=self.trial) - return False - - return True - - def handle_invalid_llm_response( - self, final_response_text: str) -> prompts.Prompt: + def handle_invalid_llm_response(self, + final_response_text: str) -> prompts.Prompt: """Handle the LLM response and update the result.""" template_builder = prompt_builder.ContextAnalyzerTemplateBuilder( @@ -104,7 +79,6 @@ def handle_invalid_llm_response( [self.inspect_tool], self.round, final_response_text, template_builder.build(), template_builder.get_response_format()) - def execute(self, result_history: list[resultslib.Result]) -> resultslib.Result: """Execute the agent with the given results.""" @@ -116,9 +90,9 @@ def execute(self, if not isinstance( last_result, resultslib.AnalysisResult) or not last_result.crash_result: - logger.error( - f'Expected last result to be AnalysisResult, got {type(last_result)}.', - trial=self.trial) + logger.error(f'Expected last result to be AnalysisResult, got %s.', + type(last_result), + trial=self.trial) return last_result context_result = None @@ -135,17 +109,16 @@ def execute(self, # Chat with the LLM asynchronously and return the result while self.round < self.max_round: final_response = self.chat_llm(self.round, - client=None, - prompt=prompt, - trial=result_history[-1].trial) + client=None, + prompt=prompt, + trial=result_history[-1].trial) context_result = resultslib.CrashContextResult.from_dict(final_response) if context_result: break logger.error('Failed to parse LLM response into CrashContextResult.', - trial=self.trial) + trial=self.trial) prompt = self.handle_invalid_llm_response(final_response) - # Terminate the inspect tool after the analysis is done self.inspect_tool.terminate() analysis_result = resultslib.AnalysisResult( @@ -280,33 +253,34 @@ def get_function_implementation(self, project_name: str, return response - def report_final_result(self, feasible: bool, analysis: str, recommendations: str, tool_context: ToolContext) -> dict: + def report_final_result(self, feasible: bool, analysis: str, + recommendations: str, + tool_context: ToolContext) -> dict: """ - Provide final result, including the crash feasibility, detailed analysis, and any recommendations. + Provide final result, including the crash feasibility, + detailed analysis, and any recommendations. Args: feasible (bool): True if the crash is feasible, False otherwise. - analysis (str): Detailed analysis and source code evidence showing why the crash is or is not feasible. - recommendations (str): Recommendations for modifying the fuzz target to prevent the crash. If the crash is feasible, this should be empty. + analysis (str): Detailed analysis and source code evidence showing + why the crash is or is not feasible. + recommendations (str): Recommendations for modifying the fuzz target to + prevent the crash. If the crash is feasible, + this should be empty. Returns: This function will not return anything to the LLM. """ response = f""" - \n{feasible}\n \n{analysis}\n \n{recommendations}\n - """ self.log_llm_response(response) crash_context_result = resultslib.CrashContextResult( - feasible=feasible, - analysis=analysis, - recommendations=recommendations - ) + feasible=feasible, analysis=analysis, recommendations=recommendations) # We have received final result. Instruct the agent to terminate execution. # tool_context._invocation_context.end_invocation = True - tool_context.actions.skip_summarization = True + self.end_llm_chat(tool_context) return crash_context_result.to_dict() diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index 2b8b0cd1f..b919b0237 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -1109,7 +1109,8 @@ def build(self, function_signature: str = '') -> prompts.Prompt: """Returns an empty prompt.""" - del (example_pair, project_example_content, project_context_content) + del (example_pair, project_example_content, project_context_content, + tool_guides, project_dir, project_name, function_signature) return self._prompt diff --git a/results.py b/results.py index 1039907f5..cfdf9b11f 100644 --- a/results.py +++ b/results.py @@ -331,13 +331,13 @@ def to_dict(self) -> dict: def from_dict(data: Any) -> Optional['CrashContextResult']: """Creates a CrashContextResult from a dictionary.""" - if not isinstance(data, dict) or 'feasible' not in data or 'analysis' not in data: + if not isinstance(data, + dict) or 'feasible' not in data or 'analysis' not in data: return None - return CrashContextResult( - feasible=data.get('feasible', False), - analysis=data.get('analysis', ''), - recommendations=data.get('recommendations', '')) + return CrashContextResult(feasible=data.get('feasible', False), + analysis=data.get('analysis', ''), + recommendations=data.get('recommendations', '')) # TODO: Make this class an attribute of Result, avoid too many attributes in one From b9de63c736e5a3af04be1bc2eb811f1826192b06 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 8 Jul 2025 22:00:32 +0000 Subject: [PATCH 06/12] Removed unneeded function --- agent/context_analyzer.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/agent/context_analyzer.py b/agent/context_analyzer.py index 79a27404f..2d7beb6dd 100644 --- a/agent/context_analyzer.py +++ b/agent/context_analyzer.py @@ -53,21 +53,6 @@ def __init__(self, tools, name) self.project_functions = None - def write_requirements_to_file(self, args, requirements: str) -> str: - """Writes the requirements to a file.""" - if not requirements: - logger.warning('No requirements to write to file.', trial=self.trial) - return '' - - requirement_path = args.work_dirs.requirements_file_path(self.trial) - with open(requirement_path, 'w') as f: - f.write(requirements) - logger.info('Requirements written to %s', - requirement_path, - trial=self.trial) - - return requirement_path - def handle_invalid_llm_response(self, final_response_text: str) -> prompts.Prompt: """Handle the LLM response and update the result.""" From c0119d4628772afde078437846d96b5b03051bad Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 8 Jul 2025 22:35:39 +0000 Subject: [PATCH 07/12] Integrated context analyzer result in enhancer. --- agent/enhancer.py | 4 +- llm_toolkit/prompt_builder.py | 51 ++++----- prompts/agent/enhancer-crash-priming.txt | 129 +++++++++++++++++++++++ 3 files changed, 153 insertions(+), 31 deletions(-) create mode 100644 prompts/agent/enhancer-crash-priming.txt diff --git a/agent/enhancer.py b/agent/enhancer.py index bba47c61e..322c5652f 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -64,10 +64,10 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: last_build_result, error_desc, errors) elif last_result.crash_result: crash_result = last_result.crash_result + context_result = last_result.crash_context_result builder = CrashEnhancerTemplateBuilder(self.llm, benchmark, last_build_result, - crash_result.insight, - crash_result.stacktrace) + crash_result, context_result) elif last_result.coverage_result: builder = CoverageEnhancerTemplateBuilder( self.llm, diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index b919b0237..fa4221b71 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -28,7 +28,7 @@ from experiment.benchmark import Benchmark, FileType from experiment.fuzz_target_error import SemanticCheckResult from llm_toolkit import models, prompts -from results import AnalysisResult, BuildResult, CoverageResult, RunResult +from results import AnalysisResult, BuildResult, CoverageResult, CrashContextResult, CrashResult, RunResult logger = logging.getLogger(__name__) @@ -75,6 +75,7 @@ BUILD_ERROR_SUMMARY = 'The code has the following build issues:' FUZZ_ERROR_SUMMARY = 'The code can build successfully but has a runtime issue: ' +FIX_RECOMMENDATION_HEADER = 'Here are some fix suggestions you can apply.\n' C_PROMPT_HEADERS_TO_ALWAYS_INCLUDES = ['stdio.h', 'stdlib.h', 'stdint.h'] @@ -786,17 +787,17 @@ def __init__(self, model: models.LLM, benchmark: Benchmark, build_result: BuildResult, - insight: str = '', - stacktrace: str = '', + crash_result: CrashResult, + context_result: Optional[CrashContextResult], template_dir: str = DEFAULT_TEMPLATE_DIR, initial: Any = None): super().__init__(model, benchmark, template_dir, initial) # Load templates. self.priming_template_file = self._find_template(self.agent_templare_dir, - 'enhancer-priming.txt') + 'enhancer-crash-priming.txt') self.build_result = build_result - self.insight = insight - self.stacktrace = stacktrace + self.crash_result = crash_result + self.context_result = context_result def build(self, example_pair: list[list[str]], @@ -816,34 +817,26 @@ def build(self, self.benchmark.function_signature) priming = priming.replace('{PROJECT_DIR}', project_dir) priming = priming.replace('{TOOL_GUIDES}', tool_guides) - if self.build_result.build_script_source: - build_text = (f'\n{self.build_result.build_script_source}\n' - '') - else: - build_text = 'Build script reuses `/src/build.bk.sh`.' - priming = priming.replace('{BUILD_TEXT}', build_text) - priming_weight = self._model.estimate_token_num(priming) - # TODO(dongge): Refine this logic. - error_desc = f""" - Here is the insight from the crash analyzer: - {self.insight} + priming = priming.replace('{FUZZ_TARGET_SOURCE}', + self.build_result.fuzz_target_source) + priming = priming.replace('{CRASH_STACKTRACE}', self.crash_result.stacktrace) + priming = priming.replace('{CRASH_ANALYZER_INSIGHT}', + self.crash_result.insight) - Below is crash report: - {self.stacktrace} - """ - errors = [] - problem = self._format_fixer_problem(self.build_result.fuzz_target_source, - error_desc, errors, priming_weight, '', - '') + if self.context_result: + priming = priming.replace('CONTEXT_ANALYZER_INSIGHT', + self.context_result.analysis) + fix_recommendations = FIX_RECOMMENDATION_HEADER + self.context_result.recommendations + priming = priming.replace('FIX_RECOMMENDATION', + fix_recommendations) - # TODO(pamusuo): Refactor this logic before merging if function_requirements: - requirements = (f'\nHere are the requirements for the function.\n' - f'{function_requirements}\n') - problem += requirements + priming = priming.replace('{FUNCTION_REQUIREMENTS}', + function_requirements) + + self._prompt.append(priming) - self._prepare_prompt(priming, problem) return self._prompt diff --git a/prompts/agent/enhancer-crash-priming.txt b/prompts/agent/enhancer-crash-priming.txt new file mode 100644 index 000000000..95a8aec22 --- /dev/null +++ b/prompts/agent/enhancer-crash-priming.txt @@ -0,0 +1,129 @@ + +As a security testing engineer, you must refine the following compilable {LANGUAGE} fuzz target to make it more suitable for fuzzing function {FUNCTION_SIGNATURE}, based on its current runtime errors description. +Objective: Your task is to understand the runtime error, and refine the fuzz target (and build script if needed) accordingly. Note that the fuzz target can already compile. + + + +Follow these steps to refine the fuzz target: + +Step 1. Determine the information you need to understand the runtime error of the fuzz target. +This includes: +* The existing compilable fuzz target provided below. +* The existing build script provided below. +* **Source code** of the function under test. +* **Custom Types and Dependencies** definitions and implementations. +* **Initialization and setup** requirements and steps. +* **Build details** and integration steps. +* Valid and edge-case input values. +* Environmental and runtime dependencies. + +Step 2. Collect information using the Bash tool. +Use the bash tool (see section) and follow its rules to gather the necessary information. You can collect information from: +* The project source code directory `{PROJECT_DIR}/` cloned from the project repository. +* Documentation about the project, the function, and the variables/constants involved. +* Environment variables. +* Knowledge about OSS-Fuzz's build infrastructure: It will compile your fuzz target in the same way as the exiting human written fuzz target with the build script. + +Step 3. Analyze the function and its parameters. +Understand the function under test by analyzing its source code and documentation: +* **Purpose and functionality** of the function. +* **Input processing** and internal logic. +* **Dependencies** on other functions or global variables. +* **Error handling** and edge cases. + +Step 4. Understand initialization requirements. +Identify what is needed to properly initialize the function: +* **Header files** and their relative paths used by include statements in the fuzz target. +* **Complex input parameters or objects** initialization. +* **Constructor functions** or initialization routines. +* **Global state** or configuration needs to be set up. +* **Mocking** external dependencies if necessary. + +Step 5. Understand Constraints and edge cases. +For each input parameter, understand: +* Valid ranges and data types. +* Invalid or edge-case values (e.g., zero, NULL, predefined constants, maximum values). +* Special values that trigger different code paths. + +Step 6: Plan Fuzz Target Implementation. +Decide how to implement the refined fuzz target: +* The fuzz target can compile so your can reuse most of the code as a scaffold. +* Only modify the parts caused the runtime error, no more no less. +* Prepare to output the FULL new fuzz target, do not leave out any code that is the same as before. +* **Extract parameters** from the `data` and `size` variable of `LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)`. +* Handle fixed-size versus variable-size data. +* **Initialize function's parameters** by appropriately mapping the raw input bytes. +* Ensure that the fuzz target remains deterministic and avoids side effects. +* Avoid `goto` statements. + +* +Step 7 (Optional): **Modify** the Build Script. +Modify the build script only if the existing one in this prompt is insufficient: +* Decide if you need to modify the build script to successfully build the refined fuzz target. +* If the build script needs to be modified, prepare to output the FULL new build script, do not leave out any code that is the same as before. +* Leave it empty if no modification is needed. + +Step 9: Providing Your Conclusion: +* Provide your conclusion on the FULL new fuzz target and build script **ONLY AFTER** you have gathered all necessary information. +* **DO NOT SEND** any other content (e.g., bash tool commands) in the conclusion message. ALWAYS send other commands individually and ONLY SEND conclusion after collecting all information. +* Conclusion Format: +* Overall Description: +* Summarize the error, the root cause your found, and describe your fuzz target refinement. +* Wrap this summary within and tags. +* Modified Fuzz Target: +* Provide the full code of the refined fuzz target. +* Wrap the code within and tags. +* Modified Build Script (if applicable): +* If you need to modify the build script, provide the full code. +* Wrap it within and tags. +* Format Example: + +The fuzz target has runtime error ___, which is caused by ___. +I will refined it by ___. +Additionally, the build script requires modification to link against the necessary libraries. + + +[Your FULL fuzz target code here, do not omit existing code] + + +[Your FULL build script code here, do not omit existing code.] + + + + +{TOOL_GUIDES} + +Below is the original fuzz target which compiles but crashes: + +{FUZZ_TARGET_SOURCE} + + +Below is the error to fix: +The code can build successfully but has the following crash: + +{CRASH_STACKTRACE} + + +Here is a detailed analysis about the crash + +{CRASH_ANALYZER_INSIGHT} + + +{CONTEXT_ANALYZER_INSIGHT} + + + +{FIX_RECOMMENDATION} + + +Here are the requirements for the target function. + +{FUNCTION_REQUIREMENTS} + + +Fix code: +1. Consider possible solutions for the issues listed above. +2. Choose a solution that can maximize fuzzing result, which is utilizing the function under test and feeding it not null input. +3. Apply the solutions to the original code. +It's important to show the complete code, not only the fixed line. + From a0b57176629d5f08d62b74bf7986f32498c88e34 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 8 Jul 2025 22:39:32 +0000 Subject: [PATCH 08/12] Presubmit fixes --- agent/enhancer.py | 4 ++-- llm_toolkit/prompt_builder.py | 13 +++++++------ 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/agent/enhancer.py b/agent/enhancer.py index 322c5652f..0f669a216 100644 --- a/agent/enhancer.py +++ b/agent/enhancer.py @@ -66,8 +66,8 @@ def _initial_prompt(self, results: list[Result]) -> Prompt: crash_result = last_result.crash_result context_result = last_result.crash_context_result builder = CrashEnhancerTemplateBuilder(self.llm, benchmark, - last_build_result, - crash_result, context_result) + last_build_result, crash_result, + context_result) elif last_result.coverage_result: builder = CoverageEnhancerTemplateBuilder( self.llm, diff --git a/llm_toolkit/prompt_builder.py b/llm_toolkit/prompt_builder.py index fa4221b71..ddb801a54 100644 --- a/llm_toolkit/prompt_builder.py +++ b/llm_toolkit/prompt_builder.py @@ -28,7 +28,8 @@ from experiment.benchmark import Benchmark, FileType from experiment.fuzz_target_error import SemanticCheckResult from llm_toolkit import models, prompts -from results import AnalysisResult, BuildResult, CoverageResult, CrashContextResult, CrashResult, RunResult +from results import (AnalysisResult, BuildResult, CoverageResult, + CrashContextResult, CrashResult, RunResult) logger = logging.getLogger(__name__) @@ -793,8 +794,8 @@ def __init__(self, initial: Any = None): super().__init__(model, benchmark, template_dir, initial) # Load templates. - self.priming_template_file = self._find_template(self.agent_templare_dir, - 'enhancer-crash-priming.txt') + self.priming_template_file = self._find_template( + self.agent_templare_dir, 'enhancer-crash-priming.txt') self.build_result = build_result self.crash_result = crash_result self.context_result = context_result @@ -820,7 +821,8 @@ def build(self, priming = priming.replace('{FUZZ_TARGET_SOURCE}', self.build_result.fuzz_target_source) - priming = priming.replace('{CRASH_STACKTRACE}', self.crash_result.stacktrace) + priming = priming.replace('{CRASH_STACKTRACE}', + self.crash_result.stacktrace) priming = priming.replace('{CRASH_ANALYZER_INSIGHT}', self.crash_result.insight) @@ -828,8 +830,7 @@ def build(self, priming = priming.replace('CONTEXT_ANALYZER_INSIGHT', self.context_result.analysis) fix_recommendations = FIX_RECOMMENDATION_HEADER + self.context_result.recommendations - priming = priming.replace('FIX_RECOMMENDATION', - fix_recommendations) + priming = priming.replace('FIX_RECOMMENDATION', fix_recommendations) if function_requirements: priming = priming.replace('{FUNCTION_REQUIREMENTS}', From b34c3f2779e38a490bc6b095ff9a9243037188be Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 8 Jul 2025 22:52:06 +0000 Subject: [PATCH 09/12] Added a test for context_analyzer_agent. --- agent_tests/context_analyzer_test.py | 328 +++++++++++++++++++++++++++ 1 file changed, 328 insertions(+) create mode 100644 agent_tests/context_analyzer_test.py diff --git a/agent_tests/context_analyzer_test.py b/agent_tests/context_analyzer_test.py new file mode 100644 index 000000000..fc33c92b7 --- /dev/null +++ b/agent_tests/context_analyzer_test.py @@ -0,0 +1,328 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A test for the function analyzer agent.""" + +import argparse +import json +from data_prep import introspector +import logger +import os +from typing import List + +from results import AnalysisResult, CrashResult, RunResult +import run_all_experiments +from agent import context_analyzer +from experiment import benchmark as benchmarklib +from experiment import workdir +from llm_toolkit import models +from datetime import datetime +import traceback + +import run_one_experiment + +RESULTS_DIR = f'./results-{datetime.now().strftime("%Y-%m-%d-%H-%M")}' + +NUM_ANA = int(os.getenv('LLM_NUM_ANA', '2')) + + +def parse_args() -> argparse.Namespace: + """Parses command line arguments.""" + parser = argparse.ArgumentParser( + description='Evaluate the function analyzer agent.') + + parser.add_argument('-y', + '--benchmark-yaml', + type=str, + help='A benchmark YAML file.') + + parser.add_argument('-b', + '--benchmarks-directory', + type=str, + help='A directory containing benchmark YAML files.') + + parser.add_argument( + '-g', + '--generate-benchmarks', + help=('Generate benchmarks and use those for analysis. This is a string ' + 'of comma-separated heuristics to use when identifying benchmark ' + 'targets.'), + type=str) + + parser.add_argument('-mr', + '--max-round', + type=int, + default=100, + help='Max trial round for agents.') + + parser.add_argument('-e', + '--introspector-endpoint', + type=str, + default=introspector.DEFAULT_INTROSPECTOR_ENDPOINT) + + + parser.add_argument( + '-of', + '--oss-fuzz-dir', + help='OSS-Fuzz dir path to use. Create temporary directory by default.', + default='') + + parser.add_argument('-w', '--work-dir', default=RESULTS_DIR) + + parsed_args = parser.parse_args() + + return parsed_args + +def get_mock_last_result(args, benchmark: benchmarklib.Benchmark) -> AnalysisResult: + + stacktrace = """ +AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x557d26695151 bp 0x7ffe468518b0 sp 0x7ffe46851860 T0) +==17==The signal is caused by a READ memory access. +==17==Hint: address points to the zero page. +SCARINESS: 10 (null-deref) +#0 0x557d26695151 in toc_header /src/hoextdown/src/html.c:987 +#1 0x557d266770a0 in parse_atxheader /src/hoextdown/src/document.c:2740:3 +#2 0x557d266770a0 in parse_block /src/hoextdown/src/document.c:3558:11 +#3 0x557d26675c36 in hoedown_document_render /src/hoextdown/src/document.c:4162:3 +#4 0x557d2666e8ef in LLVMFuzzerTestOneInput /src/hoextdown/test/hoedown_fuzzer.c:78:3 +#5 0x557d26523300 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:614:13 +#6 0x557d26522b25 in fuzzer::Fuzzer::RunOne(unsigned char const*, unsigned long, bool, fuzzer::InputInfo*, bool, bool*) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:516:7 +#7 0x557d26524305 in fuzzer::Fuzzer::MutateAndTestOne() /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:760:19 +#8 0x557d26525095 in fuzzer::Fuzzer::Loop(std::__Fuzzer::vector>&) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:905:5 +#9 0x557d26513edb in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:914:6 +#10 0x557d2653f2b2 in main /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 +#11 0x7f964403b082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 5792732f783158c66fb4f3756458ca24e46e827d) +#12 0x557d2650675d in _start (out/libfuzzer-address-x86_64/hoedown_fuzzer+0x5575d) + +DEDUP_TOKEN: toc_header--parse_atxheader--parse_block +AddressSanitizer can not provide additional info. + """ + + insight = """ +The crash is caused by a null pointer dereference in the `toc_header` function in `/src/hoextdown/src/html.c`. + +Specifically, at line 987: +`rndr_header_id(ob, content->data, content->size, 1, data);` + +The `content` pointer is dereferenced without a prior null check. A null check for `content` exists at line 991, but this is after the pointer has already been dereferenced at line 987, which is the source of the bug. + +The fix is to move the null check for `content` to before the dereference. The `if (content)` block starting at line 991 should be moved to enclose the code that uses `content`, including the call to `rndr_header_id` at line 987. + +```c +// src/html.c:985 +} else { +if (content) { // Add this check +hoedown_buffer_puts(ob, "data, content->size, 1, data); +hoedown_buffer_puts(ob, "\">"); +} +} + +if (content) { +hoedown_buffer_put(ob, content->data, content->size); +} +HOEDOWN_BUFPUTSL(ob, "\n"); +``` + +The corrected logic should look something like this: + +```c +// Potential patch +} else { +if (content) { +hoedown_buffer_puts(ob, "data, content->size, 1, data); +hoedown_buffer_puts(ob, "\">"); +} +} + +if (content) { +hoedown_buffer_put(ob, content->data, content->size); +} +if (content) { // This check should wrap the closing tag as well +HOEDOWN_BUFPUTSL(ob, "\n"); +} +``` +A more robust fix would be to ensure that if `content` is NULL, the `` tag is not opened at all. + +The bug is in the project code, not the fuzzer driver. The fuzzer correctly identified a valid crash. + """ + + + fuzz_target_source = """ +#include +#include +#include +#include +#include +#include + +#include "context_test.h" +#include "/src/hoextdown/src/document.h" +#include "html.h" + +#define DEF_OUNIT 64 +#define DEF_MAX_NESTING 16 + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { +// Need at least 8 bytes for config: +// 4: extensions, 1: attr_activation, 2: html_flags, 1: nesting_level/renderer_choice +if (size < 8) { +return 0; +} + +/* Use the first bytes of input to control flags and options */ +hoedown_extensions extensions = *(const uint32_t*)data; +data += 4; +size -= 4; + +uint8_t attr_activation = data[0]; +data += 1; +size -= 1; + +// Use 2 bytes for HTML flags to cover all enum values. +hoedown_html_flags html_flags = *(const uint16_t*)data; +data += 2; +size -= 2; + +// Use 1 byte to control nesting level and renderer choice. +uint8_t fuzzer_choice = data[0]; +data += 1; +size -= 1; + +int nesting_level = fuzzer_choice % 16; // Limit nesting level +int renderer_type = fuzzer_choice / 16; // Use upper bits to choose renderer + +hoedown_renderer *renderer = NULL; +void (*renderer_free)(hoedown_renderer *); + +/* Let the fuzzer choose between the standard and TOC renderer */ +if (renderer_type % 2 == 0) { +renderer = hoedown_html_renderer_new(html_flags, nesting_level); +} else { +renderer = hoedown_html_toc_renderer_new(nesting_level); +} +renderer_free = hoedown_html_renderer_free; + +if (!renderer) { +return 0; +} + +/* Perform Markdown rendering */ +hoedown_buffer *ob = hoedown_buffer_new(DEF_OUNIT); +hoedown_buffer *meta = hoedown_buffer_new(DEF_OUNIT); +hoedown_document *document = hoedown_document_new( +renderer, extensions, DEF_MAX_NESTING, attr_activation, NULL, meta); + +if (!document) { +renderer_free(renderer); +hoedown_buffer_free(ob); +hoedown_buffer_free(meta); +return 0; +} + +/* +* Call hoedown_document_render instead of hoedown_document_render_inline. +* This function processes the input as a full Markdown document, including +* block-level elements like tables, lists, headers, and code blocks, which +* will significantly increase coverage. +*/ +hoedown_document_render(document, ob, data, size); + +/* Cleanup */ +hoedown_document_free(document); +renderer_free(renderer); +hoedown_buffer_free(ob); +hoedown_buffer_free(meta); + +return 0; +} + """ + + run_result = RunResult( + benchmark=benchmark, + trial=1, + work_dirs=args.work_dirs, + author=None, + chat_history={}, + crashes=True, + fuzz_target_source=fuzz_target_source + ) + + + crash_result = CrashResult(benchmark=benchmark, + trial=1, + work_dirs=args.work_dirs, + author=None, + chat_history={}, + stacktrace=stacktrace, + true_bug=True, + insight=insight,) + + analysis_result = AnalysisResult( + author=None, + run_result=run_result, + crash_result=crash_result, + chat_history={}) + + return analysis_result + + +if __name__ == '__main__': + + model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-2-5-pro-chat') + + args = parse_args() + + args.benchmark_yaml = './benchmark-sets/comparison/hoextdown.yaml' + + # Initialize the working directory + args.work_dirs = workdir.WorkDirs(args.work_dir) + + introspector.set_introspector_endpoints(args.introspector_endpoint) + + run_one_experiment.prepare(args.oss_fuzz_dir) + + # Initialize benchmarks + benchmarks: List[ + benchmarklib.Benchmark] = run_all_experiments.prepare_experiment_targets( + args) + + if len(benchmarks) == 0: + raise ValueError('No benchmarks found in the YAML file.') + + logger.info('Loaded %d benchmarks from the YAML file %s.', len(benchmarks), + args.benchmark_yaml, trial=1) + + benchmark = benchmarks[0] # For testing, we only analyze the first benchmark + + analyzer = context_analyzer.ContextAnalyzer(trial=1, + llm=model, + args=args, + benchmark=benchmark) + + last_result = get_mock_last_result(args, benchmark) + + # Run the context analyzer + try: + result = analyzer.execute([last_result]) + + # Write result to new file in work directory + result_file = os.path.join(args.work_dirs.base, + f'{benchmark.function_name}_context_analysis_result.json') + with open(result_file, 'w') as file: + json.dump(result.to_dict(), file, indent=2) + except Exception as e: + logger.error('Error during analysis for benchmark %s: %s\n%s', + benchmark.function_name, e, traceback.format_exc(), trial=1) + From 5667e7ed9787870be4eda42a8ddcf8aa1756c846 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Tue, 8 Jul 2025 22:53:57 +0000 Subject: [PATCH 10/12] Presubmit fixes --- agent_tests/context_analyzer_test.py | 76 ++++++++++++++-------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/agent_tests/context_analyzer_test.py b/agent_tests/context_analyzer_test.py index fc33c92b7..6c31c40dc 100644 --- a/agent_tests/context_analyzer_test.py +++ b/agent_tests/context_analyzer_test.py @@ -15,21 +15,20 @@ import argparse import json -from data_prep import introspector -import logger import os +import traceback +from datetime import datetime from typing import List -from results import AnalysisResult, CrashResult, RunResult +import logger import run_all_experiments +import run_one_experiment from agent import context_analyzer +from data_prep import introspector from experiment import benchmark as benchmarklib from experiment import workdir from llm_toolkit import models -from datetime import datetime -import traceback - -import run_one_experiment +from results import AnalysisResult, CrashResult, RunResult RESULTS_DIR = f'./results-{datetime.now().strftime("%Y-%m-%d-%H-%M")}' @@ -70,7 +69,6 @@ def parse_args() -> argparse.Namespace: type=str, default=introspector.DEFAULT_INTROSPECTOR_ENDPOINT) - parser.add_argument( '-of', '--oss-fuzz-dir', @@ -83,7 +81,9 @@ def parse_args() -> argparse.Namespace: return parsed_args -def get_mock_last_result(args, benchmark: benchmarklib.Benchmark) -> AnalysisResult: + +def get_mock_last_result(args, + benchmark: benchmarklib.Benchmark) -> AnalysisResult: stacktrace = """ AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x557d26695151 bp 0x7ffe468518b0 sp 0x7ffe46851860 T0) @@ -158,7 +158,6 @@ def get_mock_last_result(args, benchmark: benchmarklib.Benchmark) -> AnalysisRes The bug is in the project code, not the fuzzer driver. The fuzzer correctly identified a valid crash. """ - fuzz_target_source = """ #include #include @@ -249,31 +248,29 @@ def get_mock_last_result(args, benchmark: benchmarklib.Benchmark) -> AnalysisRes } """ - run_result = RunResult( + run_result = RunResult(benchmark=benchmark, + trial=1, + work_dirs=args.work_dirs, + author=None, + chat_history={}, + crashes=True, + fuzz_target_source=fuzz_target_source) + + crash_result = CrashResult( benchmark=benchmark, trial=1, work_dirs=args.work_dirs, author=None, chat_history={}, - crashes=True, - fuzz_target_source=fuzz_target_source + stacktrace=stacktrace, + true_bug=True, + insight=insight, ) - - crash_result = CrashResult(benchmark=benchmark, - trial=1, - work_dirs=args.work_dirs, - author=None, - chat_history={}, - stacktrace=stacktrace, - true_bug=True, - insight=insight,) - - analysis_result = AnalysisResult( - author=None, - run_result=run_result, - crash_result=crash_result, - chat_history={}) + analysis_result = AnalysisResult(author=None, + run_result=run_result, + crash_result=crash_result, + chat_history={}) return analysis_result @@ -301,15 +298,17 @@ def get_mock_last_result(args, benchmark: benchmarklib.Benchmark) -> AnalysisRes if len(benchmarks) == 0: raise ValueError('No benchmarks found in the YAML file.') - logger.info('Loaded %d benchmarks from the YAML file %s.', len(benchmarks), - args.benchmark_yaml, trial=1) + logger.info('Loaded %d benchmarks from the YAML file %s.', + len(benchmarks), + args.benchmark_yaml, + trial=1) benchmark = benchmarks[0] # For testing, we only analyze the first benchmark analyzer = context_analyzer.ContextAnalyzer(trial=1, - llm=model, - args=args, - benchmark=benchmark) + llm=model, + args=args, + benchmark=benchmark) last_result = get_mock_last_result(args, benchmark) @@ -318,11 +317,14 @@ def get_mock_last_result(args, benchmark: benchmarklib.Benchmark) -> AnalysisRes result = analyzer.execute([last_result]) # Write result to new file in work directory - result_file = os.path.join(args.work_dirs.base, - f'{benchmark.function_name}_context_analysis_result.json') + result_file = os.path.join( + args.work_dirs.base, + f'{benchmark.function_name}_context_analysis_result.json') with open(result_file, 'w') as file: json.dump(result.to_dict(), file, indent=2) except Exception as e: logger.error('Error during analysis for benchmark %s: %s\n%s', - benchmark.function_name, e, traceback.format_exc(), trial=1) - + benchmark.function_name, + e, + traceback.format_exc(), + trial=1) From 16475c28bd9bd0669f0e6b1d3049839c3a250038 Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Mon, 14 Jul 2025 19:54:48 +0000 Subject: [PATCH 11/12] Made the code for testing agents generalizable and reusable. --- agent_tests/agent_test.py | 191 ++++++ agent_tests/base_agent_test.py | 53 ++ agent_tests/context_analyzer_test.py | 327 +-------- .../prompt_files/context-analyzer-01.txt | 646 ++++++++++++++++++ 4 files changed, 919 insertions(+), 298 deletions(-) create mode 100644 agent_tests/agent_test.py create mode 100644 agent_tests/base_agent_test.py create mode 100644 agent_tests/prompt_files/context-analyzer-01.txt diff --git a/agent_tests/agent_test.py b/agent_tests/agent_test.py new file mode 100644 index 000000000..1fa8fb229 --- /dev/null +++ b/agent_tests/agent_test.py @@ -0,0 +1,191 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""A test for the function analyzer agent.""" + +import argparse +import json +import os +import traceback +from datetime import datetime +from typing import List, Tuple, Type + +import logger +import run_one_experiment +from agent import base_agent, context_analyzer +from agent_tests import base_agent_test, context_analyzer_test +from data_prep import introspector +from experiment import benchmark as benchmarklib +from experiment import workdir +from llm_toolkit import models +from results import AnalysisResult, CrashResult, Result, RunResult + +RESULTS_DIR = f'./results-{datetime.now().strftime("%Y-%m-%d-%H-%M")}' + +NUM_ANA = int(os.getenv('LLM_NUM_ANA', '2')) + +agents = { + 'ContextAnalyzer': (context_analyzer.ContextAnalyzer, context_analyzer_test.ContextAnalyzerAgentTest), +} + + +def parse_args() -> argparse.Namespace: + """Parses command line arguments.""" + parser = argparse.ArgumentParser( + description='Evaluate the function analyzer agent.') + + parser.add_argument('-y', + '--benchmark-yaml', + type=str, + required=True, + help='A benchmark YAML file.') + + parser.add_argument('-f', + '--function-name', + type=str, + required=True, + help='The function name to analyze.') + + parser.add_argument('-p', + '--pipeline', + type=str, + required=True, + help='Comma-separated list of agent names for testing.') + + parser.add_argument('-pf', + '--prompt-file', + type=str, + required=True, + default='', + help='A file containing the prompt to reconstruct for initial agent.') + + parser.add_argument('-mr', + '--max-round', + type=int, + default=100, + help='Max trial round for agents.') + + parser.add_argument('-e', + '--introspector-endpoint', + type=str, + default=introspector.DEFAULT_INTROSPECTOR_ENDPOINT) + + parser.add_argument( + '-of', + '--oss-fuzz-dir', + help='OSS-Fuzz dir path to use. Create temporary directory by default.', + default='') + + parser.add_argument('-w', '--work-dir', default=RESULTS_DIR) + + parsed_args = parser.parse_args() + + if not parsed_args.benchmark_yaml.endswith('.yaml') or not os.path.isfile(parsed_args.benchmark_yaml): + raise ValueError('Benchmark YAML file must be a valid .yaml file.') + + if not os.path.isfile(parsed_args.prompt_file): + raise ValueError('Prompt file must be a valid file.') + with open(parsed_args.prompt_file, 'r') as file: + prompt_content = file.read() + if not prompt_content.strip(): + raise ValueError('Prompt file cannot be empty.') + parsed_args.prompt = prompt_content.strip() + + return parsed_args + +def get_test_pipeline(agents_text: str) -> List[Tuple[Type[base_agent.BaseAgent], Type[base_agent_test.BaseAgentTest]]]: + """Returns a pipeline of agents for testing.""" + + agent_list = agents_text.strip().split(',') + pipeline = [] + for agent_name in agent_list: + if agent_name not in agents: + raise ValueError(f'Agent {agent_name} is not defined in the agents dictionary.') + pipeline.append(agents[agent_name]) + if not pipeline: + raise ValueError('No agents found in the pipeline. Please provide a valid agent list.') + return pipeline + +def get_result_list_for_agent( + agent_class: Tuple[Type[base_agent.BaseAgent], Type[base_agent_test.BaseAgentTest]], + benchmark: benchmarklib.Benchmark, + prompt: str) -> List[Result]: + """Returns the initial result list for the agent.""" + + agent_test_class = agent_class[1] + # Ensure agent_test_class is a subclass of BaseAgentTest + if not issubclass(agent_test_class, base_agent_test.BaseAgentTest): + raise TypeError(f"{agent_test_class.__name__} is not a subclass of BaseAgentTest") + + agent_test_instance = agent_test_class(args, trial=1) + return agent_test_instance.setup_initial_result_list(benchmark, prompt) + +def write_result(args: argparse.Namespace, trial: int, result: Result) -> None: + """Writes the result to a file in the work directory.""" + + result_file = os.path.join(args.work_dirs.base, f'{trial}_result.json') + with open(result_file, 'w') as file: + json.dump(result.to_dict(), file, indent=2) + + logger.info('Result written to %s', result_file, trial=trial) + +if __name__ == '__main__': + + model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-2-5-pro-chat') + + args = parse_args() + + introspector.set_introspector_endpoints(args.introspector_endpoint) + + run_one_experiment.prepare(args.oss_fuzz_dir) + + # Initialize test benchmark + benchmarks = benchmarklib.Benchmark.from_yaml(args.benchmark_yaml) + + test_benchmark = [benchmark for benchmark in benchmarks + if benchmark.function_name == args.function_name] + + if not test_benchmark: + raise ValueError(f'No benchmark found for function {args.function_name}.') + + benchmark = test_benchmark[0] + + # Initialize the working directory + args.work_dirs = workdir.WorkDirs(os.path.join(args.work_dir, f'output-{benchmark.id}')) + + pipeline = get_test_pipeline(args.pipeline) + + trial = 1 + + result_list = get_result_list_for_agent(pipeline[0], benchmark, args.prompt) + + result = None + + try: + + for agent_class in pipeline: + agent_instance = agent_class[0](trial, model, args, benchmark) + + # Execute the agent with the initial results + result = agent_instance.execute(result_list) + + # Prepare for the next agent in the pipeline + result_list.append(result) + + if result: + # Write the final result to a file + write_result(args, trial, result) + + except Exception as e: + logger.error('An error occurred during the agent execution: %s', str(e), trial=trial) + logger.error('Traceback: %s', traceback.format_exc(), trial=trial) diff --git a/agent_tests/base_agent_test.py b/agent_tests/base_agent_test.py new file mode 100644 index 000000000..0f74bb095 --- /dev/null +++ b/agent_tests/base_agent_test.py @@ -0,0 +1,53 @@ +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Base class for agent tests.""" +import re + +import logger + + +class BaseAgentTest: + """Base class for agent tests, providing common setup and utility methods.""" + + def __init__(self, args, trial): + self.args = args + self.trial = trial + + def _parse_tag(self, response: str, tag: str) -> str: + """Parses the XML-style tags from LLM response.""" + match = re.search(rf'<{tag}>(.*?)', response, re.DOTALL) + return match.group(1).strip() if match else '' + + def write_requirements_to_file(self, args, requirements: str) -> str: + """Write the requirements to a file.""" + if not requirements: + logger.warning('No requirements to write to file.', trial=self.trial) + return '' + + requirement_path = args.work_dirs.requirements_file_path(self.trial) + + with open(requirement_path, 'w') as f: + f.write(requirements) + + logger.info('Requirements written to %s', + requirement_path, + trial=self.trial) + + return requirement_path + + def setup_initial_result_list(self, benchmark, prompt): + """Sets up the initial result list for the agent test.""" + # Load the benchmark and prompt file + raise NotImplementedError( + "This method should be implemented in subclasses to set up the initial result list.") diff --git a/agent_tests/context_analyzer_test.py b/agent_tests/context_analyzer_test.py index 6c31c40dc..5df7b3bb9 100644 --- a/agent_tests/context_analyzer_test.py +++ b/agent_tests/context_analyzer_test.py @@ -11,320 +11,51 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""A test for the function analyzer agent.""" -import argparse -import json -import os -import traceback -from datetime import datetime -from typing import List - -import logger -import run_all_experiments -import run_one_experiment -from agent import context_analyzer -from data_prep import introspector -from experiment import benchmark as benchmarklib -from experiment import workdir -from llm_toolkit import models +from agent_tests.base_agent_test import BaseAgentTest from results import AnalysisResult, CrashResult, RunResult -RESULTS_DIR = f'./results-{datetime.now().strftime("%Y-%m-%d-%H-%M")}' - -NUM_ANA = int(os.getenv('LLM_NUM_ANA', '2')) - - -def parse_args() -> argparse.Namespace: - """Parses command line arguments.""" - parser = argparse.ArgumentParser( - description='Evaluate the function analyzer agent.') - - parser.add_argument('-y', - '--benchmark-yaml', - type=str, - help='A benchmark YAML file.') - - parser.add_argument('-b', - '--benchmarks-directory', - type=str, - help='A directory containing benchmark YAML files.') - - parser.add_argument( - '-g', - '--generate-benchmarks', - help=('Generate benchmarks and use those for analysis. This is a string ' - 'of comma-separated heuristics to use when identifying benchmark ' - 'targets.'), - type=str) - - parser.add_argument('-mr', - '--max-round', - type=int, - default=100, - help='Max trial round for agents.') - - parser.add_argument('-e', - '--introspector-endpoint', - type=str, - default=introspector.DEFAULT_INTROSPECTOR_ENDPOINT) - - parser.add_argument( - '-of', - '--oss-fuzz-dir', - help='OSS-Fuzz dir path to use. Create temporary directory by default.', - default='') - - parser.add_argument('-w', '--work-dir', default=RESULTS_DIR) - - parsed_args = parser.parse_args() - - return parsed_args - - -def get_mock_last_result(args, - benchmark: benchmarklib.Benchmark) -> AnalysisResult: - - stacktrace = """ -AddressSanitizer: SEGV on unknown address 0x000000000000 (pc 0x557d26695151 bp 0x7ffe468518b0 sp 0x7ffe46851860 T0) -==17==The signal is caused by a READ memory access. -==17==Hint: address points to the zero page. -SCARINESS: 10 (null-deref) -#0 0x557d26695151 in toc_header /src/hoextdown/src/html.c:987 -#1 0x557d266770a0 in parse_atxheader /src/hoextdown/src/document.c:2740:3 -#2 0x557d266770a0 in parse_block /src/hoextdown/src/document.c:3558:11 -#3 0x557d26675c36 in hoedown_document_render /src/hoextdown/src/document.c:4162:3 -#4 0x557d2666e8ef in LLVMFuzzerTestOneInput /src/hoextdown/test/hoedown_fuzzer.c:78:3 -#5 0x557d26523300 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:614:13 -#6 0x557d26522b25 in fuzzer::Fuzzer::RunOne(unsigned char const*, unsigned long, bool, fuzzer::InputInfo*, bool, bool*) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:516:7 -#7 0x557d26524305 in fuzzer::Fuzzer::MutateAndTestOne() /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:760:19 -#8 0x557d26525095 in fuzzer::Fuzzer::Loop(std::__Fuzzer::vector>&) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:905:5 -#9 0x557d26513edb in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:914:6 -#10 0x557d2653f2b2 in main /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 -#11 0x7f964403b082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 5792732f783158c66fb4f3756458ca24e46e827d) -#12 0x557d2650675d in _start (out/libfuzzer-address-x86_64/hoedown_fuzzer+0x5575d) - -DEDUP_TOKEN: toc_header--parse_atxheader--parse_block -AddressSanitizer can not provide additional info. - """ - - insight = """ -The crash is caused by a null pointer dereference in the `toc_header` function in `/src/hoextdown/src/html.c`. - -Specifically, at line 987: -`rndr_header_id(ob, content->data, content->size, 1, data);` - -The `content` pointer is dereferenced without a prior null check. A null check for `content` exists at line 991, but this is after the pointer has already been dereferenced at line 987, which is the source of the bug. - -The fix is to move the null check for `content` to before the dereference. The `if (content)` block starting at line 991 should be moved to enclose the code that uses `content`, including the call to `rndr_header_id` at line 987. - -```c -// src/html.c:985 -} else { -if (content) { // Add this check -hoedown_buffer_puts(ob, "data, content->size, 1, data); -hoedown_buffer_puts(ob, "\">"); -} -} - -if (content) { -hoedown_buffer_put(ob, content->data, content->size); -} -HOEDOWN_BUFPUTSL(ob, "\n"); -``` - -The corrected logic should look something like this: - -```c -// Potential patch -} else { -if (content) { -hoedown_buffer_puts(ob, "data, content->size, 1, data); -hoedown_buffer_puts(ob, "\">"); -} -} - -if (content) { -hoedown_buffer_put(ob, content->data, content->size); -} -if (content) { // This check should wrap the closing tag as well -HOEDOWN_BUFPUTSL(ob, "\n"); -} -``` -A more robust fix would be to ensure that if `content` is NULL, the `` tag is not opened at all. - -The bug is in the project code, not the fuzzer driver. The fuzzer correctly identified a valid crash. - """ - - fuzz_target_source = """ -#include -#include -#include -#include -#include -#include -#include "context_test.h" -#include "/src/hoextdown/src/document.h" -#include "html.h" +class ContextAnalyzerAgentTest(BaseAgentTest): + """Test for the ContextAnalyzer agent.""" -#define DEF_OUNIT 64 -#define DEF_MAX_NESTING 16 + def setup_initial_result_list(self, benchmark, prompt): + """Sets up the initial result list for the ContextAnalyzer agent test.""" -int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { -// Need at least 8 bytes for config: -// 4: extensions, 1: attr_activation, 2: html_flags, 1: nesting_level/renderer_choice -if (size < 8) { -return 0; -} + # Get necessary data from prompt + fuzz_target_source = self._parse_tag(prompt, 'fuzz-target') + function_requirement = self._parse_tag(prompt, 'function-requirements') + stacktrace = self._parse_tag(prompt, 'crash-stacktrace') + insight = self._parse_tag(prompt, 'crash-analysis') -/* Use the first bytes of input to control flags and options */ -hoedown_extensions extensions = *(const uint32_t*)data; -data += 4; -size -= 4; + if function_requirement: + # Save function requirements to file + self.write_requirements_to_file(self.args, function_requirement) -uint8_t attr_activation = data[0]; -data += 1; -size -= 1; - -// Use 2 bytes for HTML flags to cover all enum values. -hoedown_html_flags html_flags = *(const uint16_t*)data; -data += 2; -size -= 2; - -// Use 1 byte to control nesting level and renderer choice. -uint8_t fuzzer_choice = data[0]; -data += 1; -size -= 1; - -int nesting_level = fuzzer_choice % 16; // Limit nesting level -int renderer_type = fuzzer_choice / 16; // Use upper bits to choose renderer - -hoedown_renderer *renderer = NULL; -void (*renderer_free)(hoedown_renderer *); - -/* Let the fuzzer choose between the standard and TOC renderer */ -if (renderer_type % 2 == 0) { -renderer = hoedown_html_renderer_new(html_flags, nesting_level); -} else { -renderer = hoedown_html_toc_renderer_new(nesting_level); -} -renderer_free = hoedown_html_renderer_free; - -if (!renderer) { -return 0; -} - -/* Perform Markdown rendering */ -hoedown_buffer *ob = hoedown_buffer_new(DEF_OUNIT); -hoedown_buffer *meta = hoedown_buffer_new(DEF_OUNIT); -hoedown_document *document = hoedown_document_new( -renderer, extensions, DEF_MAX_NESTING, attr_activation, NULL, meta); - -if (!document) { -renderer_free(renderer); -hoedown_buffer_free(ob); -hoedown_buffer_free(meta); -return 0; -} - -/* -* Call hoedown_document_render instead of hoedown_document_render_inline. -* This function processes the input as a full Markdown document, including -* block-level elements like tables, lists, headers, and code blocks, which -* will significantly increase coverage. -*/ -hoedown_document_render(document, ob, data, size); - -/* Cleanup */ -hoedown_document_free(document); -renderer_free(renderer); -hoedown_buffer_free(ob); -hoedown_buffer_free(meta); - -return 0; -} - """ - - run_result = RunResult(benchmark=benchmark, - trial=1, - work_dirs=args.work_dirs, + run_result = RunResult(benchmark=benchmark, + trial=self.trial, + work_dirs=self.args.work_dirs, author=None, chat_history={}, crashes=True, fuzz_target_source=fuzz_target_source) - crash_result = CrashResult( - benchmark=benchmark, - trial=1, - work_dirs=args.work_dirs, - author=None, - chat_history={}, - stacktrace=stacktrace, - true_bug=True, - insight=insight, - ) - - analysis_result = AnalysisResult(author=None, + crash_result = CrashResult( + benchmark=benchmark, + trial=self.trial, + work_dirs=self.args.work_dirs, + author=None, + chat_history={}, + stacktrace=stacktrace, + true_bug=True, + insight=insight, + ) + + analysis_result = AnalysisResult(author=None, run_result=run_result, crash_result=crash_result, chat_history={}) - return analysis_result - - -if __name__ == '__main__': - - model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-2-5-pro-chat') - - args = parse_args() - - args.benchmark_yaml = './benchmark-sets/comparison/hoextdown.yaml' - - # Initialize the working directory - args.work_dirs = workdir.WorkDirs(args.work_dir) - - introspector.set_introspector_endpoints(args.introspector_endpoint) - - run_one_experiment.prepare(args.oss_fuzz_dir) - - # Initialize benchmarks - benchmarks: List[ - benchmarklib.Benchmark] = run_all_experiments.prepare_experiment_targets( - args) - - if len(benchmarks) == 0: - raise ValueError('No benchmarks found in the YAML file.') - - logger.info('Loaded %d benchmarks from the YAML file %s.', - len(benchmarks), - args.benchmark_yaml, - trial=1) - - benchmark = benchmarks[0] # For testing, we only analyze the first benchmark - - analyzer = context_analyzer.ContextAnalyzer(trial=1, - llm=model, - args=args, - benchmark=benchmark) - - last_result = get_mock_last_result(args, benchmark) + return [run_result, analysis_result] - # Run the context analyzer - try: - result = analyzer.execute([last_result]) - # Write result to new file in work directory - result_file = os.path.join( - args.work_dirs.base, - f'{benchmark.function_name}_context_analysis_result.json') - with open(result_file, 'w') as file: - json.dump(result.to_dict(), file, indent=2) - except Exception as e: - logger.error('Error during analysis for benchmark %s: %s\n%s', - benchmark.function_name, - e, - traceback.format_exc(), - trial=1) diff --git a/agent_tests/prompt_files/context-analyzer-01.txt b/agent_tests/prompt_files/context-analyzer-01.txt new file mode 100644 index 000000000..8b01b92cb --- /dev/null +++ b/agent_tests/prompt_files/context-analyzer-01.txt @@ -0,0 +1,646 @@ + +You are a professional security engineer developing fuzz targets for the project astc-encoder. +The previous fuzz target you developed caused a crash in the project. +However, this crash is NOT feasible and cannot be triggered from the project's external entry points (public functions with no non-test callers). +We have provided you with the fuzz target that caused the crash, its requirements, the crash stacktrace, an analysis of the crash, +and a set of tools that you can use to navigate the project's codebase. +Your goal is to deeply analyze the project and the context of the crashing function and provide detailed source code evidence that the crash is NOT feasible from any entry point in the program. +If the crash is indeed not feasible, also provide a recommendation for fixing the fuzz target. +If the crash is feasible, provide source code evidence that shows the sequence of execution from the project's entry point that can cause the crash. + + + +Follow these steps to determine the feasibility of this crash. + +Step 1: Identify the information you need to analyze the crash and the function's context. +This includes: +- The crashing function called by the fuzz target +- The different functions where it is called from +- The entry points of the project +- The crash stacktrace and analysis +- The state of input and global variables that lead to the crash +- Locations in the code where those input variables could have been initialized +- Execution paths between the input variable initializations and the crashing function + +Step 2: Collect information using the provided tools +Use the provided tools (see section) and follow their rules to gather the necessary information. You can collect information from: +- The project source code directory `/src/astc-encoder/Source/` cloned from the project repository. +- External API using the get_function_implementation tool. +- Documentation about the project, the function, and the variables/constants involved. + +Step 3: Identify the specific input conditions that leads to the crash. +- This is the combination of input variable values and global variable values that directly caused the crash. +- In your analysis, you must show how these combination of values was necessary for the crash to occur. + +Step 4: Analyze the project and provide proof, with source code evidence, that the crash cannot be triggered from a the project's entry points (public non-test functions). +- To obtain proof, identify all the path constraints from the entry points to the target function and show how the input condition violated these constraints. +- If the crash is feasible, you should show a step-by-step execution path from the entry point and the corresponding path constraints that enable the crash. +- DO NOT SPECULATE. Provide source code evidence to back up any statement you make about the crash's feasibility. + +Step 5: If the crash is not feasible, recommend necessary and minimal modifications to the fuzz target to prevent the crash. +- Your recommendations should show what constraints should be added to the fuzz target to prevent the crash. +- The recommended modifications should mirror how the crashing function is called in different project locations. + +Step 8: MAKE SURE YOU PROVIDE YOUR RESPONSE USING THE report_final_result TOOL. +- The tool takes three arguments - feasible, analysis and recommendations. +- feasible should be True if the crash is feasible, or False otherwise. +- analysis should contain source code evidence of the crash's feasibility. +- recommendations should only be provided for non-feasible crashes, and should recommend fuzz target modifications to prevent this crash. + + +Here is the provided data. + + +// SPDX-License-Identifier: Apache-2.0 +// ---------------------------------------------------------------------------- +// Copyright 2020-2021 Arm Limited +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not +// use this file except in compliance with the License. You may obtain a copy +// of the License at: +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +// License for the specific language governing permissions and limitations +// under the License. +// ---------------------------------------------------------------------------- + +/** +* @brief Fuzz target for symbolic_to_physical(). +* +* This function is the final entrypoint for compressing a symbolic block of +* ASTC data into a 16 byte physical block. +*/ + +#include "/src/astc-encoder/Source/astcenc_internal.h" + +#include +#include +#include + +// Re-use the BSD initialization logic from the other fuzzer +struct BlockSizes +{ +int x; +int y; +int z; +}; + +// Use a set of known-valid block sizes to avoid corner cases where a +// block size has no valid modes, which can cause downstream failures. +std::array testSz {{ +{ 5, 5, 1}, // Common 2D block size +{ 8, 8, 1}, // Common 2D block size +{ 4, 4, 4} // Common 3D block size +}}; + +std::array testBSD; + +bool bsd_initializer() +{ +for (size_t i = 0; i < testSz.size(); i++) +{ +init_block_size_descriptor( +testSz[i].x, +testSz[i].y, +testSz[i].z, +false, +4, +1.0f, +testBSD[i]); +} +return true; +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ +// Preinitialize the block size descriptors we need +static bool init = bsd_initializer(); + +// Need some data to work with +if (size < 32) +{ +return 0; +} + +FuzzedDataProvider stream(data, size); + +// Select a block size descriptor to test with +int bsd_idx = stream.ConsumeIntegralInRange(0, testSz.size() - 1); +const block_size_descriptor& bsd = testBSD[bsd_idx]; + +// Create and populate the symbolic block +symbolic_compressed_block scb{}; + +// Requirement: scb.block_type != SYM_BTYPE_ERROR +scb.block_type = stream.PickValueInArray({ +SYM_BTYPE_CONST_F16, +SYM_BTYPE_CONST_U16, +SYM_BTYPE_NONCONST +}); + +if (scb.block_type == SYM_BTYPE_NONCONST) +{ +// Requirement: scb.partition_count is 1..4 +scb.partition_count = stream.ConsumeIntegralInRange(1, 4); + +// Requirement: scb.block_mode is a valid index +if (bsd.block_mode_count_all == 0) +{ +return 0; +} + +// Create a list of valid raw block mode indices for the current bsd +std::vector valid_modes; +for (uint16_t i = 0; i < WEIGHTS_MAX_BLOCK_MODES; i++) +{ +if (bsd.block_mode_packed_index[i] != BLOCK_BAD_BLOCK_MODE) +{ +valid_modes.push_back(i); +} +} + +// This should not be empty if block_mode_count_all > 0 +if (valid_modes.empty()) +{ +return 0; +} + +// Pick a valid raw mode from the list +size_t mode_idx = stream.ConsumeIntegralInRange(0, valid_modes.size() - 1); +scb.block_mode = valid_modes[mode_idx]; + +// Requirement: scb.quant_mode is QUANT_6..QUANT_32 +scb.quant_mode = static_cast(stream.ConsumeIntegralInRange(QUANT_6, QUANT_32)); + +scb.color_formats_matched = stream.ConsumeBool(); +scb.plane2_component = stream.ConsumeIntegralInRange(-1, 3); +scb.partition_index = stream.ConsumeIntegral(); + +for (unsigned int i = 0; i < scb.partition_count; i++) +{ +// The upper bits of color_formats define the endpoint class (0-3). +// The lower 2 bits are other flags. Constrain the class to be valid. +uint8_t color_class = stream.ConsumeIntegralInRange(0, 3); +uint8_t other_flags = stream.ConsumeIntegralInRange(0, 3); +scb.color_formats[i] = (color_class << 2) | other_flags; +for (int j = 0; j < 8; j++) +{ +scb.color_values[i][j] = stream.ConsumeIntegral(); +} +} +} +else // SYM_BTYPE_CONST_F16 or SYM_BTYPE_CONST_U16 +{ +// For const blocks, only initialize the constant color. +// Other fields are not used by symbolic_to_physical for constant blocks. +for (int i = 0; i < BLOCK_MAX_COMPONENTS; i++) +{ +scb.constant_color[i] = stream.ConsumeIntegral(); +} +} + +// Populate weights array with any remaining data +std::vector weights = stream.ConsumeRemainingBytes(); +if (!weights.empty()) +{ +std::memcpy(scb.weights, weights.data(), std::min(sizeof(scb.weights), weights.size())); +} + + +// Requirement: pcb must be a writable buffer of at least 16 bytes. +uint8_t pcb[16]; + +// Call the function under test +symbolic_to_physical(bsd, scb, pcb); + +return 0; +} + + + + +project name: astc-encoder +function signature: void symbolic_to_physical(const struct block_size_descriptor &, const struct symbolic_compressed_block &, uint8_t *) + + + +The symbolic_to_physical function converts a symbolic representation of a compressed block (`symbolic_compressed_block`) into a physical hardware-readable format (`physical_compressed_block`, which is a 16-byte array). It handles different block types, including constant color blocks and regular blocks with partitions, weights, and color endpoint data. The function encodes this symbolic information into a 128-bit physical block according to the ASTC specification. + + + + +The `scb.block_type` member must not be equal to `SYM_BTYPE_ERROR`. The function asserts this condition at the beginning, and a failure will lead to a program crash. + + +The `pcb` argument must be a writable buffer of at least 16 bytes. The function writes up to 16 bytes into this buffer, and a smaller buffer will result in an out-of-bounds write. + + +The `bsd` argument should be initialized by calling `astcenc_context_alloc` and obtaining the `bsd` member from the resulting context. This ensures that the block size descriptor contains valid data for the subsequent encoding operations. + + +The `scb.partition_count` member must be between 1 and 4, inclusive, if the block type is not constant. The function uses this value to access arrays and control loops, and an invalid value can lead to out-of-bounds access. + + +The `scb.block_mode` member must be a valid index into the `block_modes` array within the `block_size_descriptor`. The function uses this to fetch block mode details, and an invalid index will cause an out-of-bounds read. + + +The `scb.quant_mode` member must be a value between `QUANT_6` and `QUANT_32`. This value is used as an index into the `color_uquant_to_scrambled_pquant_tables` array, so an invalid value will cause an out-of-bounds read. + + + + + +AddressSanitizer: stack-buffer-overflow on address 0x7f35e2f8ed60 at pc 0x560a26c8da07 bp 0x7fffe123ba80 sp 0x7fffe123ba78 +READ of size 1 at 0x7f35e2f8ed60 thread T0 +SCARINESS: 27 (1-byte-read-stack-buffer-overflow) +#0 0x560a26c8da06 in write_bits(unsigned int, unsigned int, unsigned int, unsigned char*) /src/astc-encoder/Source/./astcenc_integer_sequence.cpp:462:9 +#1 0x560a26c8d82e in encode_ise(quant_method, unsigned int, unsigned char const*, unsigned char*, unsigned int) /src/astc-encoder/Source/./astcenc_integer_sequence.cpp:644:4 +#2 0x560a26c951b7 in symbolic_to_physical(block_size_descriptor const&, symbolic_compressed_block const&, unsigned char*) /src/astc-encoder/Source/./astcenc_symbolic_physical.cpp:284:2 +#3 0x560a26c7e9e5 in LLVMFuzzerTestOneInput /src/astc-encoder/Source/./Fuzzers/fuzz_astc_physical_to_symbolic.cpp:165:2 +#4 0x560a26b32580 in fuzzer::Fuzzer::ExecuteCallback(unsigned char const*, unsigned long) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:614:13 +#5 0x560a26b31da5 in fuzzer::Fuzzer::RunOne(unsigned char const*, unsigned long, bool, fuzzer::InputInfo*, bool, bool*) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:516:7 +#6 0x560a26b33585 in fuzzer::Fuzzer::MutateAndTestOne() /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:760:19 +#7 0x560a26b34315 in fuzzer::Fuzzer::Loop(std::__Fuzzer::vector>&) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerLoop.cpp:905:5 +#8 0x560a26b2315b in fuzzer::FuzzerDriver(int*, char***, int (*)(unsigned char const*, unsigned long)) /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerDriver.cpp:914:6 +#9 0x560a26b4e532 in main /src/llvm-project/compiler-rt/lib/fuzzer/FuzzerMain.cpp:20:10 +#10 0x7f35e3710082 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x24082) (BuildId: 5792732f783158c66fb4f3756458ca24e46e827d) +#11 0x560a26b159dd in _start (out/libfuzzer-address-x86_64/fuzz_astc_physical_to_symbolic+0x4b9dd) + +DEDUP_TOKEN: write_bits(unsigned int, unsigned int, unsigned int, unsigned char*)--encode_ise(quant_method, unsigned int, unsigned char const*, unsigned char*, unsigned int)--symbolic_to_physical(block_size_descriptor const&, symbolic_compressed_block const&, unsigned char*) +Address 0x7f35e2f8ed60 is located in stack of thread T0 at offset 352 in frame +#0 0x560a26c7db3f in LLVMFuzzerTestOneInput /src/astc-encoder/Source/./Fuzzers/fuzz_astc_physical_to_symbolic.cpp:66 + +DEDUP_TOKEN: LLVMFuzzerTestOneInput +This frame has 5 object(s): +[32, 148) 'scb' (line 83) +[192, 195) 'ref.tmp' (line 86) +[208, 232) 'valid_modes' (line 104) +[272, 296) 'weights' (line 154) +[336, 352) 'pcb' (line 162) <== Memory access at offset 352 overflows this variable +HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork +(longjmp and C++ exceptions *are* supported) + + + + +**Analysis:** + +The crash is a stack-buffer-overflow read of 1 byte, occurring in the `write_bits` function. The overflow happens on the `pcb` buffer, which is allocated on the stack in `LLVMFuzzerTestOneInput` with a size of 16 bytes, the correct size for a physical ASTC block. + +The root cause is in the implementation of the `write_bits` function in `astcenc_integer_sequence.cpp`. This function is designed to perform a fast, potentially unaligned, 16-bit write operation. It does this by reading and writing to `ptr[0]` and `ptr[1]` separately to avoid strict-aliasing issues. + +The bug is triggered when `write_bits` is called with parameters that cause it to write to the very last byte of the 16-byte destination buffer. Specifically, if the `bitoffset` parameter is in the range [120, 127], the calculated byte offset (`bitoffset >> 3`) becomes 15. The function then advances its pointer to `pcb + 15`. While the subsequent access to `ptr[0]` (i.e., `pcb[15]`) is valid, the access to `ptr[1]` attempts to read from `pcb[16]`, which is one byte beyond the buffer's allocated memory, causing the overflow. + +The fuzzer is operating correctly. It supplies a valid 16-byte buffer and explores input parameters that lead `encode_ise` to call `write_bits` with an edge-case `bitoffset` value, successfully exposing this latent bug in the project's code. + +**Suggestion:** + +The `write_bits` function should be patched to prevent it from reading past the end of the destination buffer. A simple guard can be added to ensure the access to the second byte (`ptr[1]`) only occurs when the write operation is not on the last byte of the buffer. + +A suggested patch for `astcenc_integer_sequence.cpp` around line 460 would be: + +```cpp +// Original code: +ptr[0] &= mask; +ptr[0] |= value; +ptr[1] &= mask >> 8; +ptr[1] |= value >> 8; + +// Patched code: +unsigned int byte_offset = bitoffset >> 3; +// ... (code to advance ptr) + +ptr[0] &= (mask & 0xFF); +ptr[0] |= (value & 0xFF); + +// Only touch the second byte if we are not on the last byte of the 16-byte block +if (byte_offset < 15) +{ +ptr[1] &= ((mask >> 8) & 0xFF); +ptr[1] |= ((value >> 8) & 0xFF); +} +``` +This change ensures that when writing to the final byte of the buffer, the function does not attempt to read or write the non-existent subsequent byte. + + +You will be provided with the following tools. +1. get_function_implementation: This is a tool you can use to retrieve the implementation of a function using the project's name and function's name as arguments. +2. search_project_files: This is an interactive tool you can use to search the project's source file using bash commands and find definitions or usages of functions, classes, structs, and variables. +The usage guide for the Bash Tool is provided below. +3. report_final_result: This is a tool you must use to report your final result. Once this tool is called, no further instructions will be provided to you. +**Bash tool Guide** +Use the bash tool to investigate files in the fuzz target's build environment. This will help you understand the project source code, the function under test, its dependencies, and any compilation requirements. + + +1. STRICTLY Only One Bash Command per message: +* **DO NOT** send multiple bash commands in each message. +2. Execute Bash Command Message Structure: +* Reason for the Command: +* Explain the reason for running the command. +* Wrap this explanation within and tags. +* Bash Command: +* Provide the bash command to execute. +* Wrap the command with and tags. +* Format Example: + +I want to locate the source file containing the definition of the function-under-test to examine its implementation. + + +grep -rn 'function_name(' /src/project-name/ + +3. Receiving Bash Command Output Message Structure: +* Bash execution outputs will be returned in the following format: + +[The command you executed.] + + +[Standard output of the command.] + + +[Standard error of the command.] + + + + +1 .File Access and Modification Restrictions: +* Allowed Actions: +* View any files and environment variables in the build environment. +* Prohibited Actions: +* Do not modify, rename, or create new files. +* All modifications will not be preserved when building the fuzz target. + + + +1 .Purposeful Commands: +* Each bash command should have a clear purpose related to your investigation toward the final goals. +2. Careful Interpretation: +* Analyze the output of each command thoroughly to inform your next steps. +* Keep notes of important findings that will help in modifying the fuzz target and build script. +4. Clarity and Compliance: +* Adhere strictly to the interaction protocols and formatting requirements. +* Ensure your messages are clear and properly formatted. +5. No Unauthorized Actions: +* Do not modify files. +6. Avoid using `pkg-config`: +* Use bash commands to manually identify the correct file paths +* Explore the project's directory hierarchy (`/src/`) to learn headerfiles locations, library's naming conventions, and build system. + + + +Command 1. Start by locating the function's definition and understand its parameters, e.g.: + +To find the definition of `my_function` in the project directory and understand its implementation details. + + +grep -rn 'my_function(' /src/project/ + +Command 2. Identify Required Headers: + +To identify the header files in the project directory that declare `my_function`. + + +grep -rn 'my_function' /src/project/ --include=*.h + +Command 3. Locate Custom Type Definitions: + +To find the definition of the custom type `CustomType` used by `my_function`. + + +grep -rn 'typedef.*CustomType' /src/project/ + +Command 4. Examine Existing Fuzz Targets: + +To see how existing fuzz targets include headers and initialize variables in the `LLVMFuzzerTestOneInput` function. + + +cat /src/astc-encoder/Source/Fuzzers/fuzz_astc_physical_to_symbolic.cpp + +* Remember you can use the same command on other example fuzz targets under the same parent directory as `/src/astc-encoder/Source/Fuzzers/fuzz_astc_physical_to_symbolic.cpp`. +Command 5. Check Build Script for Compilation Flags and Libraries: + +To check which compiler flags and libraries are used in the build script. + + +cat /src/build.bk.sh + +Command 6. Verify Available Libraries: + +To list the built libraries to verify that the necessary libraries are available. + + +ls /src/project/build/libs/ + +Command 7. Understand Environment Variables: + +To check if any environment variables related to the project are set. + + +printenv | grep 'PROJECT_VARIABLE' + + + + +1. Do Not Compile or Run Code: +* Your investigation is limited to reading and interpreting information using bash commands. + + +WARNING:google_genai.types:Warning: there are non-text parts in the response: ['function_call'], returning concatenated text result from text parts. Check the full candidates.content.parts accessor to get the full model response. +2025-07-10 19:16:17 [Trial ID: 02] INFO [base_agent.log_llm_response:424]: +Requesting implementation for the function: +Function name: symbolic_to_physical +Project name: astc-encoder +2025-07-10 19:16:17 [Trial ID: 02] INFO [context_analyzer.get_function_implementation:200]: Project functions not initialized. Initializing for project "astc-encoder". +2025-07-10 19:16:18 [Trial ID: 02] INFO [base_agent.log_llm_prompt:417]: +Project name: astc-encoder +Function name: symbolic_to_physical + +Function source code: +void symbolic_to_physical( +const block_size_descriptor& bsd, +const symbolic_compressed_block& scb, +uint8_t pcb[16] +) { +assert(scb.block_type != SYM_BTYPE_ERROR); + +// Constant color block using UNORM16 colors +if (scb.block_type == SYM_BTYPE_CONST_U16) +{ +// There is currently no attempt to coalesce larger void-extents +static const uint8_t cbytes[8] { 0xFC, 0xFD, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +for (unsigned int i = 0; i < 8; i++) +{ +pcb[i] = cbytes[i]; +} + +for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) +{ +pcb[2 * i + 8] = scb.constant_color[i] & 0xFF; +pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF; +} + +return; +} + +// Constant color block using FP16 colors +if (scb.block_type == SYM_BTYPE_CONST_F16) +{ +// There is currently no attempt to coalesce larger void-extents +static const uint8_t cbytes[8] { 0xFC, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF }; +for (unsigned int i = 0; i < 8; i++) +{ +pcb[i] = cbytes[i]; +} + +for (unsigned int i = 0; i < BLOCK_MAX_COMPONENTS; i++) +{ +pcb[2 * i + 8] = scb.constant_color[i] & 0xFF; +pcb[2 * i + 9] = (scb.constant_color[i] >> 8) & 0xFF; +} + +return; +} + +unsigned int partition_count = scb.partition_count; + +// Compress the weights. +// They are encoded as an ordinary integer-sequence, then bit-reversed +uint8_t weightbuf[16] { 0 }; + +const auto& bm = bsd.get_block_mode(scb.block_mode); +const auto& di = bsd.get_decimation_info(bm.decimation_mode); +int weight_count = di.weight_count; +quant_method weight_quant_method = bm.get_weight_quant_mode(); +float weight_quant_levels = static_cast(get_quant_level(weight_quant_method)); +int is_dual_plane = bm.is_dual_plane; + +const auto& qat = quant_and_xfer_tables[weight_quant_method]; + +int real_weight_count = is_dual_plane ? 2 * weight_count : weight_count; + +int bits_for_weights = get_ise_sequence_bitcount(real_weight_count, weight_quant_method); + +uint8_t weights[64]; +if (is_dual_plane) +{ +for (int i = 0; i < weight_count; i++) +{ +float uqw = static_cast(scb.weights[i]); +float qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f); +int qwi = static_cast(qw + 0.5f); +weights[2 * i] = qat.scramble_map[qwi]; + +uqw = static_cast(scb.weights[i + WEIGHTS_PLANE2_OFFSET]); +qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f); +qwi = static_cast(qw + 0.5f); +weights[2 * i + 1] = qat.scramble_map[qwi]; +} +} +else +{ +for (int i = 0; i < weight_count; i++) +{ +float uqw = static_cast(scb.weights[i]); +float qw = (uqw / 64.0f) * (weight_quant_levels - 1.0f); +int qwi = static_cast(qw + 0.5f); +weights[i] = qat.scramble_map[qwi]; +} +} + +encode_ise(weight_quant_method, real_weight_count, weights, weightbuf, 0); + +for (int i = 0; i < 16; i++) +{ +pcb[i] = static_cast(bitrev8(weightbuf[15 - i])); +} + +write_bits(scb.block_mode, 11, 0, pcb); +write_bits(partition_count - 1, 2, 11, pcb); + +int below_weights_pos = 128 - bits_for_weights; + +// Encode partition index and color endpoint types for blocks with 2+ partitions +if (partition_count > 1) +{ +write_bits(scb.partition_index, 6, 13, pcb); +write_bits(scb.partition_index >> 6, PARTITION_INDEX_BITS - 6, 19, pcb); + +if (scb.color_formats_matched) +{ +write_bits(scb.color_formats[0] << 2, 6, 13 + PARTITION_INDEX_BITS, pcb); +} +else +{ +// Check endpoint types for each partition to determine the lowest class present +int low_class = 4; + +for (unsigned int i = 0; i < partition_count; i++) +{ +int class_of_format = scb.color_formats[i] >> 2; +low_class = astc::min(class_of_format, low_class); +} + +if (low_class == 3) +{ +low_class = 2; +} + +int encoded_type = low_class + 1; +int bitpos = 2; + +for (unsigned int i = 0; i < partition_count; i++) +{ +int classbit_of_format = (scb.color_formats[i] >> 2) - low_class; +encoded_type |= classbit_of_format << bitpos; +bitpos++; +} + +for (unsigned int i = 0; i < partition_count; i++) +{ +int lowbits_of_format = scb.color_formats[i] & 3; +encoded_type |= lowbits_of_format << bitpos; +bitpos += 2; +} + +int encoded_type_lowpart = encoded_type & 0x3F; +int encoded_type_highpart = encoded_type >> 6; +int encoded_type_highpart_size = (3 * partition_count) - 4; +int encoded_type_highpart_pos = 128 - bits_for_weights - encoded_type_highpart_size; +write_bits(encoded_type_lowpart, 6, 13 + PARTITION_INDEX_BITS, pcb); +write_bits(encoded_type_highpart, encoded_type_highpart_size, encoded_type_highpart_pos, pcb); +below_weights_pos -= encoded_type_highpart_size; +} +} +else +{ +write_bits(scb.color_formats[0], 4, 13, pcb); +} + +// In dual-plane mode, encode the color component of the second plane of weights +if (is_dual_plane) +{ +write_bits(scb.plane2_component, 2, below_weights_pos - 2, pcb); +} + +// Encode the color components +uint8_t values_to_encode[32]; +int valuecount_to_encode = 0; + +const uint8_t* pack_table = color_uquant_to_scrambled_pquant_tables[scb.quant_mode - QUANT_6]; +for (unsigned int i = 0; i < scb.partition_count; i++) +{ +int vals = 2 * (scb.color_formats[i] >> 2) + 2; +assert(vals <= 8); +for (int j = 0; j < vals; j++) +{ +values_to_encode[j + valuecount_to_encode] = pack_table[scb.color_values[i][j]]; +} +valuecount_to_encode += vals; +} + +encode_ise(scb.get_color_quant_mode(), valuecount_to_encode, values_to_encode, pcb, +scb.partition_count == 1 ? 17 : 19 + PARTITION_INDEX_BITS); +} \ No newline at end of file From f6aa4a923da6b55b784061eb841f79dc254e686d Mon Sep 17 00:00:00 2001 From: Paschal Amusuo Date: Mon, 14 Jul 2025 22:14:00 +0000 Subject: [PATCH 12/12] Presubmit fixes --- agent_tests/agent_test.py | 58 ++++++++++++++++++---------- agent_tests/base_agent_test.py | 3 +- agent_tests/context_analyzer_test.py | 20 +++++----- 3 files changed, 49 insertions(+), 32 deletions(-) diff --git a/agent_tests/agent_test.py b/agent_tests/agent_test.py index 1fa8fb229..546151c84 100644 --- a/agent_tests/agent_test.py +++ b/agent_tests/agent_test.py @@ -35,7 +35,8 @@ NUM_ANA = int(os.getenv('LLM_NUM_ANA', '2')) agents = { - 'ContextAnalyzer': (context_analyzer.ContextAnalyzer, context_analyzer_test.ContextAnalyzerAgentTest), + 'ContextAnalyzer': (context_analyzer.ContextAnalyzer, + context_analyzer_test.ContextAnalyzerAgentTest), } @@ -62,12 +63,13 @@ def parse_args() -> argparse.Namespace: required=True, help='Comma-separated list of agent names for testing.') - parser.add_argument('-pf', - '--prompt-file', - type=str, - required=True, - default='', - help='A file containing the prompt to reconstruct for initial agent.') + parser.add_argument( + '-pf', + '--prompt-file', + type=str, + required=True, + default='', + help='A file containing the prompt to reconstruct for initial agent.') parser.add_argument('-mr', '--max-round', @@ -90,7 +92,8 @@ def parse_args() -> argparse.Namespace: parsed_args = parser.parse_args() - if not parsed_args.benchmark_yaml.endswith('.yaml') or not os.path.isfile(parsed_args.benchmark_yaml): + if not parsed_args.benchmark_yaml.endswith('.yaml') or not os.path.isfile( + parsed_args.benchmark_yaml): raise ValueError('Benchmark YAML file must be a valid .yaml file.') if not os.path.isfile(parsed_args.prompt_file): @@ -103,33 +106,42 @@ def parse_args() -> argparse.Namespace: return parsed_args -def get_test_pipeline(agents_text: str) -> List[Tuple[Type[base_agent.BaseAgent], Type[base_agent_test.BaseAgentTest]]]: + +def get_test_pipeline( + agents_text: str +) -> List[Tuple[Type[base_agent.BaseAgent], + Type[base_agent_test.BaseAgentTest]]]: """Returns a pipeline of agents for testing.""" agent_list = agents_text.strip().split(',') pipeline = [] for agent_name in agent_list: if agent_name not in agents: - raise ValueError(f'Agent {agent_name} is not defined in the agents dictionary.') + raise ValueError( + f'Agent {agent_name} is not defined in the agents dictionary.') pipeline.append(agents[agent_name]) if not pipeline: - raise ValueError('No agents found in the pipeline. Please provide a valid agent list.') + raise ValueError( + 'No agents found in the pipeline. Please provide a valid agent list.') return pipeline -def get_result_list_for_agent( - agent_class: Tuple[Type[base_agent.BaseAgent], Type[base_agent_test.BaseAgentTest]], - benchmark: benchmarklib.Benchmark, - prompt: str) -> List[Result]: + +def get_result_list_for_agent(agent_class: Tuple[ + Type[base_agent.BaseAgent], Type[base_agent_test.BaseAgentTest]], + benchmark: benchmarklib.Benchmark, + prompt: str) -> List[Result]: """Returns the initial result list for the agent.""" agent_test_class = agent_class[1] # Ensure agent_test_class is a subclass of BaseAgentTest if not issubclass(agent_test_class, base_agent_test.BaseAgentTest): - raise TypeError(f"{agent_test_class.__name__} is not a subclass of BaseAgentTest") + raise TypeError( + f"{agent_test_class.__name__} is not a subclass of BaseAgentTest") agent_test_instance = agent_test_class(args, trial=1) return agent_test_instance.setup_initial_result_list(benchmark, prompt) + def write_result(args: argparse.Namespace, trial: int, result: Result) -> None: """Writes the result to a file in the work directory.""" @@ -139,6 +151,7 @@ def write_result(args: argparse.Namespace, trial: int, result: Result) -> None: logger.info('Result written to %s', result_file, trial=trial) + if __name__ == '__main__': model = models.LLM.setup(ai_binary='', name='vertex_ai_gemini-2-5-pro-chat') @@ -152,8 +165,10 @@ def write_result(args: argparse.Namespace, trial: int, result: Result) -> None: # Initialize test benchmark benchmarks = benchmarklib.Benchmark.from_yaml(args.benchmark_yaml) - test_benchmark = [benchmark for benchmark in benchmarks - if benchmark.function_name == args.function_name] + test_benchmark = [ + benchmark for benchmark in benchmarks + if benchmark.function_name == args.function_name + ] if not test_benchmark: raise ValueError(f'No benchmark found for function {args.function_name}.') @@ -161,7 +176,8 @@ def write_result(args: argparse.Namespace, trial: int, result: Result) -> None: benchmark = test_benchmark[0] # Initialize the working directory - args.work_dirs = workdir.WorkDirs(os.path.join(args.work_dir, f'output-{benchmark.id}')) + args.work_dirs = workdir.WorkDirs( + os.path.join(args.work_dir, f'output-{benchmark.id}')) pipeline = get_test_pipeline(args.pipeline) @@ -187,5 +203,7 @@ def write_result(args: argparse.Namespace, trial: int, result: Result) -> None: write_result(args, trial, result) except Exception as e: - logger.error('An error occurred during the agent execution: %s', str(e), trial=trial) + logger.error('An error occurred during the agent execution: %s', + str(e), + trial=trial) logger.error('Traceback: %s', traceback.format_exc(), trial=trial) diff --git a/agent_tests/base_agent_test.py b/agent_tests/base_agent_test.py index 0f74bb095..ceec24668 100644 --- a/agent_tests/base_agent_test.py +++ b/agent_tests/base_agent_test.py @@ -50,4 +50,5 @@ def setup_initial_result_list(self, benchmark, prompt): """Sets up the initial result list for the agent test.""" # Load the benchmark and prompt file raise NotImplementedError( - "This method should be implemented in subclasses to set up the initial result list.") + "This method should be implemented in subclasses to set up the initial result list." + ) diff --git a/agent_tests/context_analyzer_test.py b/agent_tests/context_analyzer_test.py index 5df7b3bb9..4c2a4bca0 100644 --- a/agent_tests/context_analyzer_test.py +++ b/agent_tests/context_analyzer_test.py @@ -33,12 +33,12 @@ def setup_initial_result_list(self, benchmark, prompt): self.write_requirements_to_file(self.args, function_requirement) run_result = RunResult(benchmark=benchmark, - trial=self.trial, - work_dirs=self.args.work_dirs, - author=None, - chat_history={}, - crashes=True, - fuzz_target_source=fuzz_target_source) + trial=self.trial, + work_dirs=self.args.work_dirs, + author=None, + chat_history={}, + crashes=True, + fuzz_target_source=fuzz_target_source) crash_result = CrashResult( benchmark=benchmark, @@ -52,10 +52,8 @@ def setup_initial_result_list(self, benchmark, prompt): ) analysis_result = AnalysisResult(author=None, - run_result=run_result, - crash_result=crash_result, - chat_history={}) + run_result=run_result, + crash_result=crash_result, + chat_history={}) return [run_result, analysis_result] - -