Skip to content

Commit e46888b

Browse files
authored
Fix undefined reference (#465)
1. Add the header file of the function-under-test in the code generation prompt. 2. Add the header file or source code file of the missing function in the code fixing prompt (C projects mostly). 3. Related instructions.
1 parent 6a4fe5c commit e46888b

File tree

5 files changed

+144
-18
lines changed

5 files changed

+144
-18
lines changed

data_prep/project_context/context_introspector.py

+57-7
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import logging
55
import os
66
from difflib import SequenceMatcher
7-
from typing import Any
7+
from typing import Any, Optional
88

99
from data_prep import introspector
1010
from experiment import benchmark as benchmarklib
@@ -165,12 +165,14 @@ def get_context_info(self) -> dict:
165165
func_source = self._get_function_implementation()
166166
files = self._get_files_to_include()
167167
decl = self._get_embeddable_declaration()
168+
header = self.get_prefixed_header_file()
168169

169170
context_info = {
170171
'xrefs': xrefs,
171172
'func_source': func_source,
172173
'files': files,
173-
'decl': decl
174+
'decl': decl,
175+
'header': header,
174176
}
175177

176178
logging.debug('Context: %s', context_info)
@@ -229,7 +231,7 @@ def get_same_header_file_paths(self, wrong_file: str) -> list[str]:
229231
for header in header_list:
230232
correct_file_name = os.path.splitext(os.path.basename(header))
231233
if wrong_file_name == correct_file_name:
232-
candidate_headers.append(header)
234+
candidate_headers.append(os.path.normpath(header))
233235

234236
return candidate_headers[:5]
235237

@@ -245,11 +247,23 @@ def get_similar_header_file_paths(self, wrong_file: str) -> list[str]:
245247
candidate_headers = sorted(candidate_header_scores,
246248
key=lambda x: candidate_header_scores[x],
247249
reverse=True)
248-
return candidate_headers[:5]
250+
return [os.path.normpath(header) for header in candidate_headers[:5]]
251+
252+
def _get_header_files_to_include(self, func_sig: str) -> Optional[str]:
253+
"""Retrieves the header file of the function signature."""
254+
header_file = introspector.query_introspector_header_files_to_include(
255+
self._benchmark.project, func_sig)
256+
return header_file[0] if header_file else None
249257

250-
def get_target_function_file_path(self) -> str:
258+
def _get_target_function_file_path(self) -> str:
251259
"""Retrieves the header/source file of the function under test."""
252-
# Step 1: Find a header file that shares the same name as the source file.
260+
# Step 1: Find a header file from the default API.
261+
header_file = self._get_header_files_to_include(
262+
self._benchmark.function_signature)
263+
if header_file:
264+
return header_file
265+
266+
# Step 2: Find a header file that shares the same name as the source file.
253267
# TODO: Make this more robust, e.g., when header file and base file do not
254268
# share the same basename.
255269
source_file = introspector.query_introspector_source_file_path(
@@ -264,5 +278,41 @@ def get_target_function_file_path(self) -> str:
264278
if candidate_headers:
265279
return candidate_headers[0]
266280

267-
# Step 2: Use the source file If it does not have a same-name-header.
281+
# Step 3: Use the source file If it does not have a same-name-header.
268282
return source_file
283+
284+
def get_prefixed_header_file(self, func_sig: str = '') -> Optional[str]:
285+
"""Retrieves the header_file with `extern "C"` if needed."""
286+
if func_sig:
287+
header_file = self._get_header_files_to_include(func_sig)
288+
else:
289+
header_file = self._get_target_function_file_path()
290+
291+
if not header_file:
292+
return None
293+
include_statement = f'#include "{os.path.normpath(header_file)}"'
294+
return (f'extern "C" {{\n{include_statement}\n}}'
295+
if self._benchmark.needs_extern else include_statement)
296+
297+
def get_prefixed_header_file_by_name(self, func_name: str) -> Optional[str]:
298+
"""Retrieves the header file based on function name with `extern "C"` if
299+
needed."""
300+
func_sig = introspector.query_introspector_function_signature(
301+
self._benchmark.project, func_name)
302+
return self.get_prefixed_header_file(func_sig)
303+
304+
def get_prefixed_source_file(self,
305+
function_signature: str = '') -> Optional[str]:
306+
"""Retrieves the source file with `extern "C"` if needed."""
307+
if function_signature:
308+
source_file = introspector.query_introspector_source_file_path(
309+
self._benchmark.project, function_signature)
310+
else:
311+
source_file = introspector.query_introspector_source_file_path(
312+
self._benchmark.project, self._benchmark.function_signature)
313+
if not source_file:
314+
return None
315+
316+
include_statement = f'#include "{os.path.normpath(source_file)}"'
317+
return (f'extern "C" {{\n{include_statement}\n}}'
318+
if self._benchmark.needs_extern else include_statement)

experiment/benchmark.py

+21-2
Original file line numberDiff line numberDiff line change
@@ -184,11 +184,30 @@ def file_type(self) -> FileType:
184184
"""Returns the file type of the benchmark."""
185185
return get_file_type(self.target_path)
186186

187+
@property
188+
def is_c_target(self) -> bool:
189+
"""Validates if the project is written in C."""
190+
return self.file_type.value.lower() == 'c'
191+
192+
@property
193+
def is_cpp_target(self) -> bool:
194+
"""Validates if the project is written in C++."""
195+
return self.file_type.value.lower() == 'c++'
196+
197+
@property
198+
def is_c_projcet(self) -> bool:
199+
"""Validates if the project is written in C."""
200+
return self.language.lower() == 'c'
201+
202+
@property
203+
def is_cpp_projcet(self) -> bool:
204+
"""Validates if the project is written in C++."""
205+
return self.language.lower() == 'c++'
206+
187207
@property
188208
def needs_extern(self) -> bool:
189209
"""Checks if it is C++ fuzz target for a C project, which needs `extern`."""
190-
return (self.file_type.value.lower() == 'c++' and
191-
self.language.lower() == 'c')
210+
return self.is_cpp_target and self.is_c_projcet
192211

193212

194213
def get_file_type(file_path: str) -> FileType:

llm_toolkit/code_fixer.py

+61-2
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
ERROR_LINES = 20
3131
NO_MEMBER_ERROR_REGEX = r"error: no member named '.*' in '([^':]*):?.*'"
3232
FILE_NOT_FOUND_ERROR_REGEX = r"fatal error: '([^']*)' file not found"
33+
UNDEFINED_REF_ERROR_REGEX = r"undefined reference to `([^']*)'"
3334
UNKNOWN_TYPE_ERROR = 'error: unknown type name'
3435

3536
# The following strings identify errors when a C fuzz target attempts to use
@@ -461,6 +462,8 @@ def _collect_instructions(benchmark: benchmarklib.Benchmark, errors: list[str],
461462
for error in errors:
462463
instruction += _collect_instruction_file_not_found(benchmark, error,
463464
fuzz_target_source_code)
465+
instruction += _collect_instruction_undefined_reference(
466+
benchmark, error, fuzz_target_source_code)
464467
instruction += _collect_instruction_fdp_in_c_target(benchmark, errors,
465468
fuzz_target_source_code)
466469
instruction += _collect_instruction_no_goto(fuzz_target_source_code)
@@ -470,6 +473,62 @@ def _collect_instructions(benchmark: benchmarklib.Benchmark, errors: list[str],
470473
return instruction
471474

472475

476+
def _collect_instruction_undefined_reference(
477+
benchmark: benchmarklib.Benchmark, error: str,
478+
fuzz_target_source_code: str) -> str:
479+
"""Collects the instructions to fix the 'undefined reference' errors."""
480+
matched_funcs = re.findall(UNDEFINED_REF_ERROR_REGEX, error)
481+
if not matched_funcs:
482+
return ''
483+
instruction = ''
484+
for undefined_func in matched_funcs:
485+
if undefined_func == 'LLVMFuzzerTestOneInput':
486+
continue
487+
ci = context_introspector.ContextRetriever(benchmark)
488+
header_file = ci.get_prefixed_header_file_by_name(undefined_func)
489+
if header_file and header_file not in fuzz_target_source_code:
490+
instruction += (
491+
'You must add the following #include statement to fix the error of '
492+
f'<error>undefined reference to {undefined_func}</error>:\n<code>\n'
493+
f'{header_file}\n</code>.\n')
494+
elif not header_file and benchmark.is_c_projcet:
495+
instruction += (
496+
f'You must remove the function <code>{undefined_func}</code> from the'
497+
' generated fuzz target, because the function does not exist.\n')
498+
elif not header_file or header_file in fuzz_target_source_code:
499+
# C project: NO header file found, or
500+
# C++: Cannot map demangled C++ function name to signature
501+
source_file = ci.get_prefixed_source_file(undefined_func)
502+
if not source_file and benchmark.function_name in undefined_func:
503+
source_file = ci.get_prefixed_source_file()
504+
if source_file:
505+
if header_file:
506+
# To avoid redefinition.
507+
instruction += ('You must remove the following statement\n<code>\n'
508+
f'{header_file}</code>\n')
509+
instruction += (
510+
'You must add the following #include statement to fix the error of '
511+
f"<error>undefined reference to `{undefined_func}'</error>:\n"
512+
f'<code>\n{source_file}\n</code>.\n')
513+
else:
514+
instruction += (
515+
f"To fix <error>undefined reference to `{undefined_func}'</error>,"
516+
'check the library documentation (e.g. README.md, comments) for '
517+
'special instructions, such as required macros or specific inclusion '
518+
'methods. Ensure any necessary definitions or inclusions are '
519+
'correctly implemented in your generated fuzz target, following the '
520+
"library's guidance.")
521+
if not instruction:
522+
instruction += (
523+
f"To fix <error>undefined reference to `{undefined_func}'</error>,"
524+
'check the library documentation (e.g. README.md, comments) for '
525+
'special instructions, such as required macros or specific inclusion '
526+
'methods. Ensure any necessary definitions or inclusions are '
527+
'correctly implemented in your generated fuzz target, following the '
528+
"library's guidance.")
529+
return instruction
530+
531+
473532
def _collect_instruction_file_not_found(benchmark: benchmarklib.Benchmark,
474533
error: str,
475534
fuzz_target_source_code: str) -> str:
@@ -498,8 +557,8 @@ def _collect_instruction_file_not_found(benchmark: benchmarklib.Benchmark,
498557
return instruction
499558

500559
# Step 3: Suggest the header/source file of the function under test.
501-
function_file = ci.get_target_function_file_path()
502-
if f'#include "{function_file}"' in fuzz_target_source_code:
560+
function_file = ci.get_prefixed_header_file()
561+
if function_file and f'#include "{function_file}"' in fuzz_target_source_code:
503562
function_file_base_name = os.path.basename(function_file)
504563

505564
instruction += (

llm_toolkit/prompt_builder.py

+1
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def format_context(self, context_info: dict) -> str:
198198
must_insert=context_info['decl'],
199199
func_source=context_info['func_source'],
200200
xrefs='\n'.join(context_info['xrefs']),
201+
include_statement=context_info['header'],
201202
)
202203

203204
def _select_examples(self, examples: list[list],

prompts/template_xml/context.txt

+4-7
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1-
{% if headers %}
2-
3-
The code block below contains the header files which you must include. The function requires these header files to successfully compile.
4-
If the headers are private, do not include them.
5-
Header files:
1+
{% if include_statement %}
2+
You must add the following #include statement in the fuzz target, its header file defines the function-under-test.
63
<code>
7-
{{ headers }}
4+
{{ include_statement }}
85
</code>
96
{% endif %}
10-
{% if must_insert %}
117

8+
{% if must_insert %}
129
You must insert code in the below code block before the function being tested:
1310
<code>
1411
{{ must_insert }}

0 commit comments

Comments
 (0)