Skip to content

Commit 787c97a

Browse files
unify logging (#487)
We use logging in multiple formats: - logging.info/debug/.. - print( - write to files It's fairly inconsistent. So am unifying it in a more Pythonic way. The format also makes it easier to track performance of the tool as we can find bottlenecks by looking at the logs. --------- Signed-off-by: David Korczynski <[email protected]>
1 parent e46888b commit 787c97a

17 files changed

+316
-241
lines changed

data_prep/introspector.py

+34-33
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@
3333
from experiment import benchmark as benchmarklib
3434
from experiment import oss_fuzz_checkout
3535

36+
logger = logging.getLogger(__name__)
37+
3638
T = TypeVar('T', str, list, dict, int) # Generic type.
3739

3840
TIMEOUT = 45
@@ -96,7 +98,6 @@ def set_introspector_endpoints(endpoint):
9698
INTROSPECTOR_FUNCTION_WITH_MATCHING_RETURN_TYPE
9799

98100
INTROSPECTOR_ENDPOINT = endpoint
99-
logging.info('Fuzz Introspector endpoint set to %s', INTROSPECTOR_ENDPOINT)
100101

101102
INTROSPECTOR_CFG = f'{INTROSPECTOR_ENDPOINT}/annotated-cfg'
102103
INTROSPECTOR_ORACLE_FAR_REACH = (
@@ -139,7 +140,7 @@ def _query_introspector(api: str, params: dict) -> Optional[requests.Response]:
139140
try:
140141
resp = requests.get(api, params, timeout=TIMEOUT)
141142
if not resp.ok:
142-
logging.error(
143+
logger.error(
143144
'Failed to get data from FI:\n'
144145
'%s\n'
145146
'-----------Response received------------\n'
@@ -150,19 +151,19 @@ def _query_introspector(api: str, params: dict) -> Optional[requests.Response]:
150151
return resp
151152
except requests.exceptions.Timeout as err:
152153
if attempt_num == MAX_RETRY:
153-
logging.error(
154+
logger.error(
154155
'Failed to get data from FI due to timeout, max retry exceeded:\n'
155156
'%s\n'
156157
'Error: %s', _construct_url(api, params), err)
157158
break
158159
delay = 5 * 2**attempt_num + random.randint(1, 10)
159-
logging.warning(
160+
logger.warning(
160161
'Failed to get data from FI due to timeout on attempt %d:\n'
161162
'%s\n'
162163
'retry in %ds...', attempt_num, _construct_url(api, params), delay)
163164
time.sleep(delay)
164165
except requests.exceptions.RequestException as err:
165-
logging.error(
166+
logger.error(
166167
'Failed to get data from FI due to unexpected error:\n'
167168
'%s\n'
168169
'Error: %s', _construct_url(api, params), err)
@@ -180,7 +181,7 @@ def _get_data(resp: Optional[requests.Response], key: str,
180181
try:
181182
data = resp.json()
182183
except requests.exceptions.InvalidJSONError:
183-
logging.error(
184+
logger.error(
184185
'Unable to parse response from FI:\n'
185186
'%s\n'
186187
'-----------Response received------------\n'
@@ -193,9 +194,9 @@ def _get_data(resp: Optional[requests.Response], key: str,
193194
if content:
194195
return content
195196

196-
logging.error('Failed to get %s from FI:\n'
197-
'%s\n'
198-
'%s', key, resp.url, data)
197+
logger.error('Failed to get %s from FI:\n'
198+
'%s\n'
199+
'%s', key, resp.url, data)
199200
return default_value
200201

201202

@@ -243,7 +244,7 @@ def query_introspector_for_targets(project, target_oracle) -> list[Dict]:
243244
"""Queries introspector for target functions."""
244245
query_func = get_oracle_dict().get(target_oracle, None)
245246
if not query_func:
246-
logging.error('No such oracle "%s"', target_oracle)
247+
logger.error('No such oracle "%s"', target_oracle)
247248
sys.exit(1)
248249
return query_func(project)
249250

@@ -453,7 +454,7 @@ def _get_raw_return_type(function: dict, project: str) -> str:
453454
"""Returns the raw function type."""
454455
return_type = function.get('return-type') or function.get('return_type', '')
455456
if not return_type:
456-
logging.error(
457+
logger.error(
457458
'Missing return type in project: %s\n'
458459
' raw_function_name: %s', project,
459460
get_raw_function_name(function, project))
@@ -475,8 +476,8 @@ def get_raw_function_name(function: dict, project: str) -> str:
475476
raw_name = (function.get('raw-function-name') or
476477
function.get('raw_function_name', ''))
477478
if not raw_name:
478-
logging.error('No raw function name in project: %s for function: %s',
479-
project, function)
479+
logger.error('No raw function name in project: %s for function: %s',
480+
project, function)
480481
return raw_name
481482

482483

@@ -485,7 +486,7 @@ def _get_clean_arg_types(function: dict, project: str) -> list[str]:
485486
raw_arg_types = (function.get('arg-types') or
486487
function.get('function_arguments', []))
487488
if not raw_arg_types:
488-
logging.error(
489+
logger.error(
489490
'Missing argument types in project: %s\n'
490491
' raw_function_name: %s', project,
491492
get_raw_function_name(function, project))
@@ -521,7 +522,7 @@ def _get_arg_names(function: dict, project: str, language: str) -> list[str]:
521522
arg_names = (function.get('arg-names') or
522523
function.get('function_argument_names', []))
523524
if not arg_names:
524-
logging.error(
525+
logger.error(
525526
'Missing argument names in project: %s\n'
526527
' raw_function_name: %s', project,
527528
get_raw_function_name(function, project))
@@ -535,7 +536,7 @@ def get_function_signature(function: dict, project: str) -> str:
535536
# For JVM projects, the full function signature are the raw function name
536537
return get_raw_function_name(function, project)
537538
if not function_signature:
538-
logging.error(
539+
logger.error(
539540
'Missing function signature in project: %s\n'
540541
' raw_function_name: %s', project,
541542
get_raw_function_name(function, project))
@@ -565,7 +566,7 @@ def _select_top_functions_from_oracle(project: str, limit: int,
565566
if target_oracle not in target_oracles:
566567
return OrderedDict()
567568

568-
logging.info('Extracting functions using oracle %s.', target_oracle)
569+
logger.info('Extracting functions using oracle %s.', target_oracle)
569570
functions = query_introspector_for_targets(project, target_oracle)[:limit]
570571

571572
return OrderedDict((func['function_signature'], func) for func in functions)
@@ -650,7 +651,7 @@ def populate_benchmarks_using_introspector(project: str, language: str,
650651
functions = _select_functions_from_oracles(project, limit, target_oracles)
651652

652653
if not functions:
653-
logging.error('No functions found using the oracles: %s', target_oracles)
654+
logger.error('No functions found using the oracles: %s', target_oracles)
654655
return []
655656

656657
if language == 'jvm':
@@ -671,13 +672,13 @@ def populate_benchmarks_using_introspector(project: str, language: str,
671672
harnesses, interesting = result
672673
harness = pick_one(harnesses)
673674
if not harness:
674-
logging.error('No fuzz target found in project %s.', project)
675+
logger.error('No fuzz target found in project %s.', project)
675676
return []
676-
logging.info('Fuzz target file found for project %s: %s', project, harness)
677+
logger.info('Fuzz target file found for project %s: %s', project, harness)
677678

678679
target_name = get_target_name(project, harness)
679-
logging.info('Fuzz target binary found for project %s: %s', project,
680-
target_name)
680+
logger.info('Fuzz target binary found for project %s: %s', project,
681+
target_name)
681682

682683
potential_benchmarks = []
683684
for function in functions:
@@ -699,17 +700,17 @@ def populate_benchmarks_using_introspector(project: str, language: str,
699700
# stored as <SOURCE_BASE>/a/b/c/d.java
700701
src_file = f'{filename.replace(".", "/")}.java'
701702
if src_file not in src_path_list:
702-
logging.error('error: %s %s', filename, interesting.keys())
703+
logger.error('error: %s %s', filename, interesting.keys())
703704
continue
704705
elif filename not in [os.path.basename(i) for i in interesting.keys()]:
705706
# TODO: Bazel messes up paths to include "/proc/self/cwd/..."
706-
logging.error('error: %s %s', filename, interesting.keys())
707+
logger.error('error: %s %s', filename, interesting.keys())
707708
continue
708709

709710
function_signature = get_function_signature(function, project)
710711
if not function_signature:
711712
continue
712-
logging.info('Function signature to fuzz: %s', function_signature)
713+
logger.info('Function signature to fuzz: %s', function_signature)
713714
potential_benchmarks.append(
714715
benchmarklib.Benchmark('cli',
715716
project,
@@ -728,7 +729,7 @@ def populate_benchmarks_using_introspector(project: str, language: str,
728729

729730
if len(potential_benchmarks) >= (limit * len(target_oracles)):
730731
break
731-
print("Length of potential targets: %d" % (len(potential_benchmarks)))
732+
logger.info("Length of potential targets: %d" % (len(potential_benchmarks)))
732733

733734
return potential_benchmarks
734735

@@ -761,7 +762,7 @@ def _identify_latest_report(project_name: str):
761762
if summaries:
762763
return ('https://storage.googleapis.com/oss-fuzz-introspector/'
763764
f'{summaries[-1]}')
764-
logging.error('Error: %s has no summary.', project_name)
765+
logger.error('Error: %s has no summary.', project_name)
765766
return None
766767

767768

@@ -799,14 +800,14 @@ def get_project_funcs(project_name: str) -> Dict[str, List[Dict]]:
799800
from FuzzIntrospector."""
800801
introspector_json_report = _extract_introspector_report(project_name)
801802
if introspector_json_report is None:
802-
logging.error('No fuzz introspector report is found.')
803+
logger.error('No fuzz introspector report is found.')
803804
return {}
804805

805806
if introspector_json_report.get('analyses') is None:
806-
logging.error('Error: introspector_json_report has no "analyses"')
807+
logger.error('Error: introspector_json_report has no "analyses"')
807808
return {}
808809
if introspector_json_report.get('analyses').get('AnnotatedCFG') is None:
809-
logging.error(
810+
logger.error(
810811
'Error: introspector_json_report["analyses"] has no "AnnotatedCFG"')
811812
return {}
812813

@@ -885,8 +886,8 @@ def _parse_arguments() -> argparse.Namespace:
885886
oss_fuzz_checkout.clone_oss_fuzz()
886887
oss_fuzz_checkout.postprocess_oss_fuzz()
887888
except subprocess.CalledProcessError as e:
888-
logging.error('Failed to prepare OSS-Fuzz directory for project %s: %s',
889-
args.project, e)
889+
logger.error('Failed to prepare OSS-Fuzz directory for project %s: %s',
890+
args.project, e)
890891
cur_project_language = oss_fuzz_checkout.get_project_language(args.project)
891892
benchmarks = populate_benchmarks_using_introspector(args.project,
892893
cur_project_language,
@@ -895,5 +896,5 @@ def _parse_arguments() -> argparse.Namespace:
895896
if benchmarks:
896897
benchmarklib.Benchmark.to_yaml(benchmarks, args.out)
897898
else:
898-
logging.error('Nothing found for %s', args.project)
899+
logger.error('Nothing found for %s', args.project)
899900
sys.exit(1)

data_prep/parse_training_data.py

+17-15
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@
3434

3535
from google.cloud import storage
3636

37+
logger = logging.getLogger(__name__)
38+
3739
STORAGE_CLIENT = storage.Client()
3840
FUZZ_TARGET_FIXING_DIR_PATTERN = r'\d+-F\d+'
3941

@@ -50,7 +52,7 @@ def prompt(self) -> str:
5052
"""Returns the prompt used by the benchmark."""
5153
prompt_path = os.path.join(self.benchmark_dir, 'prompt.txt')
5254
if not os.path.isfile(prompt_path):
53-
logging.warning('Prompt does not exist: %s', prompt_path)
55+
logger.warning('Prompt does not exist: %s', prompt_path)
5456
return ''
5557
with open(prompt_path) as prompt_file:
5658
return prompt_file.read()
@@ -70,7 +72,7 @@ def targets(self) -> Dict[str, List[str]]:
7072
all_targets = {}
7173
raw_target_dir = os.path.join(self.benchmark_dir, 'raw_targets')
7274
if not os.path.isdir(raw_target_dir):
73-
logging.warning('Raw target dir does not exist: %s', raw_target_dir)
75+
logger.warning('Raw target dir does not exist: %s', raw_target_dir)
7476
return {}
7577
raw_targets = [
7678
instance for instance in os.listdir(raw_target_dir)
@@ -83,7 +85,7 @@ def targets(self) -> Dict[str, List[str]]:
8385

8486
fixed_target_dir = os.path.join(self.benchmark_dir, 'fixed_targets')
8587
if not os.path.isdir(fixed_target_dir):
86-
logging.warning('Fixed target dir does not exist: %s', fixed_target_dir)
88+
logger.warning('Fixed target dir does not exist: %s', fixed_target_dir)
8789
return {}
8890
fix_dirs = self._get_code_fixing_dirs(fixed_target_dir)
8991
for fix_dir in sorted(fix_dirs):
@@ -96,8 +98,8 @@ def targets(self) -> Dict[str, List[str]]:
9698
with open(code_path) as code_file:
9799
fixed_code = code_file.read()
98100
if not all_targets.get(instance):
99-
logging.warning('Benchmark instance does not exist: %s - %s',
100-
self.benchmark_dir, instance)
101+
logger.warning('Benchmark instance does not exist: %s - %s',
102+
self.benchmark_dir, instance)
101103
continue
102104
all_targets[instance].append(fixed_code)
103105
return all_targets
@@ -109,20 +111,20 @@ def status(self) -> Dict[str, Dict[str, Any]]:
109111
all_status = {}
110112
status_dir = os.path.join(self.benchmark_dir, 'status')
111113
if not os.path.isdir(status_dir):
112-
logging.warning('Status dir does not exist: %s', status_dir)
114+
logger.warning('Status dir does not exist: %s', status_dir)
113115
return {}
114116
for instance in os.listdir(status_dir):
115117
status_json_path = os.path.join(status_dir, instance, 'result.json')
116118
if not os.path.isfile(status_json_path):
117-
logging.info('Missing result JSON of benchmark instance: %s - %s',
118-
self.benchmark, instance)
119+
logger.info('Missing result JSON of benchmark instance: %s - %s',
120+
self.benchmark, instance)
119121
continue
120122
with open(status_json_path) as file:
121123
try:
122124
all_status[instance] = json.load(file)
123125
except Exception as e:
124-
logging.warning(e)
125-
logging.warning(status_json_path)
126+
logger.warning(e)
127+
logger.warning(status_json_path)
126128

127129
return all_status
128130

@@ -200,7 +202,7 @@ def save_json(self, coverage: bool, group: bool, save_dir: str):
200202
data_filapath = os.path.join(save_dir, data_filename)
201203
with open(data_filapath, 'w') as file:
202204
json.dump(data, file, indent=4)
203-
logging.info('Saved to: %s', data_filapath)
205+
logger.info('Saved to: %s', data_filapath)
204206

205207

206208
class Experiment:
@@ -240,7 +242,7 @@ def save_json(self, coverage: bool, group: bool, save_dir: str) -> None:
240242
data_filapath = os.path.join(save_dir, data_filename)
241243
with open(data_filapath, 'w') as file:
242244
json.dump(data, file, indent=4)
243-
logging.info('Saved to: %s', data_filapath)
245+
logger.info('Saved to: %s', data_filapath)
244246

245247

246248
def _parse_gcs_uri(bucket_uri: str) -> tuple[str, str]:
@@ -262,7 +264,7 @@ def _download_files(experiment_dir: str, bucket_uri: str) -> None:
262264
blobs = bucket.list_blobs(prefix=directory_prefix)
263265
with ThreadPoolExecutor(max_workers=40) as executor:
264266
for i, blob in enumerate(blobs):
265-
print(f'{i} / {blobs_num}')
267+
logger.info(f'{i} / {blobs_num}')
266268
executor.submit(_download_file, blob, experiment_dir)
267269

268270

@@ -271,7 +273,7 @@ def _download_file(file_blob: storage.Blob, local_dir: str) -> None:
271273
Downloads a file from |file_blob| and preserve its path after |bucket_dir|.
272274
"""
273275
if not file_blob.name:
274-
logging.warning('Blob has no name: %s', file_blob)
276+
logger.warning('Blob has no name: %s', file_blob)
275277
return
276278
if any(
277279
file_blob.name.endswith(suffix)
@@ -357,7 +359,7 @@ def main() -> int:
357359
if args.benchmark_dir:
358360
result = Benchmark(args.benchmark_dir)
359361
if not result.is_valid_benchmark:
360-
logging.info(
362+
logger.info(
361363
'Invalid benchmark directory provided, missing necessary file.')
362364
elif args.experiment_dir:
363365
result = Experiment(args.experiment_dir, args.bucket_uri)

data_prep/project_context/context_introspector.py

+2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
from data_prep import introspector
1010
from experiment import benchmark as benchmarklib
1111

12+
logger = logging.getLogger(__name__)
13+
1214
COMPLEX_TYPES = ['const', 'enum', 'struct', 'union', 'volatile']
1315

1416

0 commit comments

Comments
 (0)