Skip to content

Commit 6405e02

Browse files
authored
Merge branch 'main' into exp-972
2 parents ff34da0 + bd7a866 commit 6405e02

26 files changed

+829
-252
lines changed

.github/workflows/osv-scanner-scheduled-push.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,4 @@ jobs:
3232
# Example of specifying custom arguments
3333
scan-args: |-
3434
-r
35-
--skip-git
3635
./

agent/coverage_analyzer.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,132 @@
1414
"""An LLM agent to analyze and provide insight of a fuzz target's low coverage.
1515
Use it as a usual module locally, or as script in cloud builds.
1616
"""
17+
import os
18+
from typing import Optional
19+
20+
import logger
1721
from agent.base_agent import BaseAgent
22+
from experiment.workdir import WorkDirs
23+
from llm_toolkit import prompt_builder
24+
from llm_toolkit.prompt_builder import CoverageAnalyzerTemplateBuilder
25+
from llm_toolkit.prompts import Prompt
26+
from results import AnalysisResult, CoverageResult, Result, RunResult
27+
from tool.container_tool import ProjectContainerTool
28+
29+
INVALID_PRMOT_PATH = os.path.join('prompts', 'agent',
30+
'coverage-analyzer-invalid-response.txt')
1831

1932

2033
class CoverageAnalyzer(BaseAgent):
21-
pass
34+
"""The Agent to refine a compilable fuzz target for higher coverage."""
35+
36+
def _initial_prompt(self, results: list[Result]) -> Prompt:
37+
"""Constructs initial prompt of the agent."""
38+
last_result = results[-1]
39+
benchmark = last_result.benchmark
40+
41+
if not isinstance(last_result, RunResult):
42+
logger.error('The last result in %s is not RunResult: %s',
43+
self.name,
44+
results,
45+
trial=self.trial)
46+
return Prompt()
47+
48+
builder = CoverageAnalyzerTemplateBuilder(self.llm, benchmark, last_result)
49+
prompt = builder.build(example_pair=[],
50+
tool_guides=self.inspect_tool.tutorial(),
51+
project_dir=self.inspect_tool.project_dir)
52+
# TODO: A different file name/dir.
53+
prompt.save(self.args.work_dirs.prompt)
54+
55+
return prompt
56+
57+
def _container_handle_conclusion(self, cur_round: int, response: str,
58+
coverage_result: CoverageResult,
59+
prompt: Prompt) -> Optional[Prompt]:
60+
"""Runs a compilation tool to validate the new fuzz target and build script
61+
from LLM."""
62+
conclusion = self._parse_tag(response, 'conclusion')
63+
if not conclusion:
64+
return prompt
65+
logger.info('----- ROUND %02d Received conclusion -----',
66+
cur_round,
67+
trial=self.trial)
68+
69+
coverage_result.improve_required = conclusion.strip().lower() == 'true'
70+
coverage_result.insight = self._parse_tag(response, 'insights')
71+
coverage_result.suggestions = self._parse_tag(response, 'suggestions')
72+
73+
return None
74+
75+
def _container_tool_reaction(
76+
self, cur_round: int, response: str, run_result: RunResult,
77+
coverage_result: CoverageResult) -> Optional[Prompt]:
78+
"""Validates LLM conclusion or executes its command."""
79+
del run_result
80+
prompt = prompt_builder.DefaultTemplateBuilder(self.llm, None).build([])
81+
82+
prompt = self._container_handle_bash_commands(response, self.inspect_tool,
83+
prompt)
84+
# Only report conclusion when no more bash investigation is required.
85+
if not prompt.gettext():
86+
# Then build fuzz target.
87+
prompt = self._container_handle_conclusion(cur_round, response,
88+
coverage_result, prompt)
89+
if prompt is None:
90+
# Succeeded.
91+
return None
92+
93+
# Finally check invalid responses.
94+
if not response or not prompt.get():
95+
prompt = self._container_handle_invalid_tool_usage(
96+
self.inspect_tool, cur_round, response, prompt)
97+
with open(INVALID_PRMOT_PATH, 'r') as prompt_file:
98+
prompt.append(prompt_file.read())
99+
100+
return prompt
101+
102+
def execute(self, result_history: list[Result]) -> AnalysisResult:
103+
"""Executes the agent to analyze the root cause to the low coverage."""
104+
WorkDirs(self.args.work_dirs.base, keep=True)
105+
last_result = result_history[-1]
106+
assert isinstance(last_result, RunResult)
107+
108+
logger.info('Executing %s', self.name, trial=last_result.trial)
109+
benchmark = last_result.benchmark
110+
# TODO(dongge): Use the generated fuzz target and build script here.
111+
self.inspect_tool = ProjectContainerTool(benchmark, name='inspect')
112+
self.inspect_tool.write_to_file(content=last_result.fuzz_target_source,
113+
file_path=benchmark.target_path)
114+
if last_result.build_script_source:
115+
self.inspect_tool.write_to_file(
116+
content=last_result.build_script_source,
117+
file_path=self.inspect_tool.build_script_path)
118+
self.inspect_tool.compile(extra_commands=' && rm -rf /out/* > /dev/null')
119+
cur_round = 1
120+
coverage_result = CoverageResult()
121+
prompt = self._initial_prompt(result_history)
122+
123+
try:
124+
client = self.llm.get_chat_client(model=self.llm.get_model())
125+
while prompt and cur_round < self.max_round:
126+
response = self.chat_llm(cur_round,
127+
client=client,
128+
prompt=prompt,
129+
trial=last_result.trial)
130+
prompt = self._container_tool_reaction(cur_round, response, last_result,
131+
coverage_result)
132+
cur_round += 1
133+
finally:
134+
# Cleanup: stop and remove the container
135+
logger.debug('Stopping and removing the inspect container %s',
136+
self.inspect_tool.container_id,
137+
trial=last_result.trial)
138+
self.inspect_tool.terminate()
139+
140+
analysis_result = AnalysisResult(
141+
author=self,
142+
run_result=last_result,
143+
coverage_result=coverage_result,
144+
chat_history={self.name: coverage_result.to_dict()})
145+
return analysis_result

agent/enhancer.py

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
"""
1717
import logger
1818
from agent.prototyper import Prototyper
19-
from llm_toolkit.prompt_builder import EnhancerTemplateBuilder, JvmFixingBuilder
20-
from llm_toolkit.prompts import Prompt
19+
from llm_toolkit.prompt_builder import (CoverageEnhancerTemplateBuilder,
20+
EnhancerTemplateBuilder,
21+
JvmFixingBuilder)
22+
from llm_toolkit.prompts import Prompt, TextPrompt
2123
from results import AnalysisResult, BuildResult, Result
2224

2325

@@ -52,9 +54,27 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
5254
last_result.run_result.fuzz_target_source, [])
5355
prompt = builder.build([], None, None)
5456
else:
55-
error_desc, errors = last_result.semantic_result.get_error_info()
56-
builder = EnhancerTemplateBuilder(self.llm, benchmark, last_build_result,
57-
error_desc, errors)
57+
# TODO(dongge): Refine this logic.
58+
if last_result.semantic_result:
59+
error_desc, errors = last_result.semantic_result.get_error_info()
60+
builder = EnhancerTemplateBuilder(self.llm, benchmark,
61+
last_build_result, error_desc, errors)
62+
elif last_result.coverage_result:
63+
builder = CoverageEnhancerTemplateBuilder(
64+
self.llm,
65+
benchmark,
66+
last_build_result,
67+
coverage_result=last_result.coverage_result)
68+
else:
69+
logger.error(
70+
'Last result does not contain either semantic result or '
71+
'coverage result',
72+
trial=self.trial)
73+
# TODO(dongge): Give some default initial prompt.
74+
prompt = TextPrompt(
75+
'Last result does not contain either semantic result or '
76+
'coverage result')
77+
return prompt
5878
prompt = builder.build(example_pair=[],
5979
tool_guides=self.inspect_tool.tutorial(),
6080
project_dir=self.inspect_tool.project_dir)

agent/one_prompt_enhancer.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,25 @@ def _initial_prompt(self, results: list[Result]) -> Prompt:
4242
last_result.run_result.fuzz_target_source, [])
4343
prompt = builder.build([], None, None)
4444
else:
45-
error_desc, errors = last_result.semantic_result.get_error_info()
45+
# TODO(dongge): Refine this logic.
4646
builder = DefaultTemplateBuilder(self.llm)
47-
prompt = builder.build_fixer_prompt(benchmark,
48-
last_result.fuzz_target_source,
49-
error_desc,
50-
errors,
51-
context='',
52-
instruction='')
47+
if last_result.semantic_result:
48+
error_desc, errors = last_result.semantic_result.get_error_info()
49+
prompt = builder.build_fixer_prompt(benchmark,
50+
last_result.fuzz_target_source,
51+
error_desc,
52+
errors,
53+
context='',
54+
instruction='')
55+
else:
56+
prompt = builder.build_fixer_prompt(
57+
benchmark=benchmark,
58+
raw_code=last_result.fuzz_target_source,
59+
error_desc='',
60+
errors=[],
61+
coverage_result=last_result.coverage_result,
62+
context='',
63+
instruction='')
5364
# TODO: A different file name/dir.
5465
prompt.save(self.args.work_dirs.prompt)
5566

agent/one_prompt_prototyper.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,8 +150,11 @@ def _advice_fuzz_target(self, build_result: BuildResult,
150150
instruction = code_fixer.collect_instructions(
151151
build_result.benchmark, errors, build_result.fuzz_target_source)
152152
prompt = builder.build_fixer_prompt(build_result.benchmark,
153-
build_result.fuzz_target_source, '',
154-
errors, context, instruction)
153+
build_result.fuzz_target_source,
154+
'',
155+
errors,
156+
context=context,
157+
instruction=instruction)
155158

156159
return prompt
157160

agent/prototyper.py

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -154,18 +154,12 @@ def _validate_fuzz_target_and_build_script_via_compile(
154154
compilation_tool = ProjectContainerTool(benchmark=benchmark)
155155

156156
# Replace fuzz target and build script in the container.
157-
replace_file_content_command = (
158-
'cat << "OFG_EOF" > {file_path}\n{file_content}\nOFG_EOF')
159-
compilation_tool.execute(
160-
replace_file_content_command.format(
161-
file_path=benchmark.target_path,
162-
file_content=build_result.fuzz_target_source))
163-
157+
compilation_tool.write_to_file(content=build_result.fuzz_target_source,
158+
file_path=benchmark.target_path)
164159
if build_result.build_script_source:
165-
compilation_tool.execute(
166-
replace_file_content_command.format(
167-
file_path='/src/build.sh',
168-
file_content=build_result.build_script_source))
160+
compilation_tool.write_to_file(
161+
content=build_result.build_script_source,
162+
file_path=compilation_tool.build_script_path)
169163

170164
# Recompile.
171165
logger.info('===== ROUND %02d Recompile =====',

agent/semantic_analyzer.py

Lines changed: 5 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
"""An LLM agent to generate a simple fuzz target prototype that can build.
1515
Use it as a usual module locally, or as script in cloud builds.
1616
"""
17-
import os
1817
import re
1918
from collections import defaultdict, namedtuple
2019
from typing import Optional
@@ -61,11 +60,8 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
6160
last_result = result_history[-1]
6261
assert isinstance(last_result, RunResult)
6362

64-
with open(
65-
os.path.join(last_result.work_dirs.run_logs, f'{self.trial:02}.log'),
66-
'rb') as fuzzer_log:
67-
_, _, _, _, semantic_result = self._parse_libfuzzer_logs(
68-
fuzzer_log, last_result.benchmark.project)
63+
_, _, _, _, semantic_result = self._parse_libfuzzer_logs(
64+
last_result.run_log, last_result.benchmark.project)
6965

7066
analysis_result = AnalysisResult(
7167
author=self,
@@ -75,24 +71,13 @@ def execute(self, result_history: list[Result]) -> AnalysisResult:
7571
return analysis_result
7672

7773
def _parse_libfuzzer_logs(self,
78-
log_handle,
74+
fuzzlog,
7975
project_name: str,
8076
check_cov_increase: bool = True) -> ParseResult:
8177
"""Parses libFuzzer logs."""
8278
lines = None
83-
try:
84-
fuzzlog = log_handle.read(-1)
85-
# Some crashes can mess up the libfuzzer output and raise decode error.
86-
fuzzlog = fuzzlog.decode('utf-8', errors='ignore')
87-
lines = fuzzlog.split('\n')
88-
except MemoryError as e:
89-
# Some logs from abnormal fuzz targets are too large to be parsed.
90-
logger.error('%s is too large to parse: %s',
91-
log_handle.name,
92-
e,
93-
trial=self.trial)
94-
return ParseResult(0, 0, False, '',
95-
SemanticCheckResult(SemanticCheckResult.LOG_MESS_UP))
79+
# Some crashes can mess up the libfuzzer output and raise decode error.
80+
lines = fuzzlog.split('\n')
9681

9782
cov_pcs, total_pcs, crashes = 0, 0, False
9883

common/cloud_builder.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ class CloudBuilder:
5353

5454
def __init__(self, args: argparse.Namespace) -> None:
5555
self.tags = ['ofg', 'agent', args.cloud_experiment_name]
56+
self.exp_args = args
5657
self.credentials, self.project_id = default()
5758
assert self.project_id, 'Cloud experiment requires a Google cloud project.'
5859
assert hasattr(
@@ -96,17 +97,21 @@ def _upload_to_gcs(self, local_file_path: str) -> str:
9697
logging.info('Uploaded %s to %s', local_file_path, bucket_file_url)
9798
return bucket_file_url
9899

99-
def _prepare_and_upload_archive(self) -> str:
100+
def _prepare_and_upload_archive(self, result_history: list[Result]) -> str:
100101
"""Archives and uploads local OFG repo to cloud build."""
101-
files_in_dir = set(
102+
dir_files = set(
102103
os.path.relpath(os.path.join(root, file))
103104
for root, _, files in os.walk(OFG_ROOT_DIR)
104105
for file in files)
105-
files_in_git = set(
106+
git_files = set(
106107
subprocess.check_output(['git', 'ls-files'],
107108
cwd=OFG_ROOT_DIR,
108109
text=True).splitlines())
109-
file_to_upload = list(files_in_dir & files_in_git)
110+
result_files = set(
111+
os.path.relpath(os.path.join(root, file))
112+
for root, _, files in os.walk(result_history[-1].work_dirs.base)
113+
for file in files)
114+
file_to_upload = list((dir_files & git_files) | result_files)
110115

111116
return self._upload_files(f'ofg-repo-{uuid.uuid4().hex}.tar.gz',
112117
OFG_ROOT_DIR, file_to_upload)
@@ -363,7 +368,7 @@ def run(self, agent: BaseAgent, result_history: list[Result],
363368
self.tags += [
364369
str(agent),
365370
str(result_history[-1].benchmark.project),
366-
# TODO(dongge): A tag for function name, compatible with tag format.
371+
str(result_history[-1].benchmark.function_name),
367372
str(result_history[-1].trial)
368373
]
369374
# Step1: Generate dill files.
@@ -374,7 +379,7 @@ def run(self, agent: BaseAgent, result_history: list[Result],
374379
# TODO(dongge): Encrypt dill files?
375380

376381
# Step 2: Upload OFG repo and dill files to GCS.
377-
ofg_url = self._prepare_and_upload_archive()
382+
ofg_url = self._prepare_and_upload_archive(result_history)
378383
agent_url = self._upload_to_gcs(agent_dill)
379384
results_url = self._upload_to_gcs(results_dill)
380385
oss_fuzz_data_url = self._upload_oss_fuzz_data()

0 commit comments

Comments
 (0)