Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/experiment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ def get_oss_fuzz_corpora_filestore_path():
return posixpath.join(get_experiment_filestore_path(), 'oss_fuzz_corpora')


def get_custom_seed_corpora_filestore_path():
"""Returns path containing the user-provided seed corpora."""
return posixpath.join(get_experiment_filestore_path(),
'custom_seed_corpora')


def get_dispatcher_instance_name(experiment: str) -> str:
"""Returns a dispatcher instance name for an experiment."""
return 'd-%s' % experiment
Expand Down
1 change: 1 addition & 0 deletions experiment/resources/runner-startup-script-template.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ docker run \
-e NO_SEEDS={{no_seeds}} \
-e NO_DICTIONARIES={{no_dictionaries}} \
-e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \
-e CUSTOM_SEED_CORPUS={{custom_seed_corpus_dir}} \
-e DOCKER_REGISTRY={{docker_registry}} {% if not local_experiment %}-e CLOUD_PROJECT={{cloud_project}} -e CLOUD_COMPUTE_ZONE={{cloud_compute_zone}} {% endif %}\
-e EXPERIMENT_FILESTORE={{experiment_filestore}} {% if local_experiment %}-v {{experiment_filestore}}:{{experiment_filestore}} {% endif %}\
-e REPORT_FILESTORE={{report_filestore}} {% if local_experiment %}-v {{report_filestore}}:{{report_filestore}} {% endif %}\
Expand Down
80 changes: 78 additions & 2 deletions experiment/run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import sys
import tarfile
import tempfile
import zipfile
from typing import Dict, List

import jinja2
Expand Down Expand Up @@ -63,6 +64,9 @@
'gs://{project}-backup.clusterfuzz-external.appspot.com/corpus/'
'libFuzzer/{fuzz_target}/public.zip')

# max size allowed per seed corpus for AFL
CORPUS_ELEMENT_BYTES_LIMIT = 1 * 1024 * 1024


def read_and_validate_experiment_config(config_filename: str) -> Dict:
"""Reads |config_filename|, validates it, finds as many errors as possible,
Expand Down Expand Up @@ -148,6 +152,48 @@ def get_directories(parent_dir):
]


# pylint: disable=too-many-locals
def validate_and_pack_custom_seed_corpus(custom_seed_corpus_dir, benchmarks):
"""Validate and archive seed corpus provided by user"""
if not os.path.isdir(custom_seed_corpus_dir):
raise ValidationError('Corpus location "%s" is invalid.' %
custom_seed_corpus_dir)

for benchmark in benchmarks:
benchmark_corpus_dir = os.path.join(custom_seed_corpus_dir, benchmark)
if not os.path.exists(benchmark_corpus_dir):
raise ValidationError('Custom seed corpus directory for '
'benchmark "%s" does not exist.' % benchmark)
if not os.path.isdir(benchmark_corpus_dir):
raise ValidationError('Seed corpus of benchmark "%s" must be '
'a directory.' % benchmark)
if not os.listdir(benchmark_corpus_dir):
raise ValidationError('Seed corpus of benchmark "%s" is empty.' %
benchmark)

valid_corpus_files = set()
for root, _, files in os.walk(benchmark_corpus_dir):
for filename in files:
file_path = os.path.join(root, filename)
file_size = os.path.getsize(file_path)

if file_size == 0 or file_size > CORPUS_ELEMENT_BYTES_LIMIT:
continue
valid_corpus_files.add(file_path)

if not valid_corpus_files:
raise ValidationError('No valid corpus files for "%s"' % benchmark)

benchmark_corpus_archive_path = os.path.join(custom_seed_corpus_dir,
f'{benchmark}.zip')
with zipfile.ZipFile(benchmark_corpus_archive_path, 'w') as archive:
for filename in valid_corpus_files:
dir_name = os.path.dirname(filename)
archive.write(
filename,
os.path.relpath(filename, os.path.join(dir_name, '..')))


def validate_benchmarks(benchmarks: List[str]):
"""Parses and validates list of benchmarks."""
benchmark_types = set()
Expand Down Expand Up @@ -219,7 +265,8 @@ def start_experiment( # pylint: disable=too-many-arguments
allow_uncommitted_changes=False,
concurrent_builds=None,
measurers_cpus=None,
runners_cpus=None):
runners_cpus=None,
custom_seed_corpus_dir=None):
"""Start a fuzzer benchmarking experiment."""
if not allow_uncommitted_changes:
check_no_uncommitted_changes()
Expand Down Expand Up @@ -248,6 +295,12 @@ def start_experiment( # pylint: disable=too-many-arguments
# 12GB is just the amount that KLEE needs, use this default to make KLEE
# experiments easier to run.
config['runner_memory'] = config.get('runner_memory', '12GB')

config['custom_seed_corpus_dir'] = custom_seed_corpus_dir
if config['custom_seed_corpus_dir']:
validate_and_pack_custom_seed_corpus(config['custom_seed_corpus_dir'],
benchmarks)

return start_experiment_from_full_config(config)


Expand Down Expand Up @@ -330,6 +383,16 @@ def filter_file(tar_info):
for benchmark in config['benchmarks']:
add_oss_fuzz_corpus(benchmark, oss_fuzz_corpora_dir)

if config['custom_seed_corpus_dir']:
for benchmark in config['benchmarks']:
benchmark_corpus_archive_path = os.path.join(
config['custom_seed_corpus_dir'], f'{benchmark}.zip')
filestore_utils.cp(
benchmark_corpus_archive_path,
experiment_utils.get_custom_seed_corpora_filestore_path() + '/',
recursive=True,
parallel=True)


class BaseDispatcher:
"""Class representing the dispatcher."""
Expand Down Expand Up @@ -522,6 +585,10 @@ def main():
'--runners-cpus',
help='Cpus available to the runners.',
required=False)
parser.add_argument('-cs',
'--custom-seed-corpus-dir',
help='Path to the custom seed corpus',
required=False)

all_fuzzers = fuzzer_utils.get_fuzzer_names()
parser.add_argument('-f',
Expand Down Expand Up @@ -585,6 +652,14 @@ def main():
parser.error('The sum of runners and measurers cpus is greater than the'
' available cpu cores (%d)' % os.cpu_count())

if args.custom_seed_corpus_dir:
if args.no_seeds:
parser.error('Cannot enable options "custom_seed_corpus_dir" and '
'"no_seeds" at the same time')
if args.oss_fuzz_corpus:
parser.error('Cannot enable options "custom_seed_corpus_dir" and '
'"oss_fuzz_corpus" at the same time')

start_experiment(args.experiment_name,
args.experiment_config,
args.benchmarks,
Expand All @@ -596,7 +671,8 @@ def main():
allow_uncommitted_changes=args.allow_uncommitted_changes,
concurrent_builds=concurrent_builds,
measurers_cpus=measurers_cpus,
runners_cpus=runners_cpus)
runners_cpus=runners_cpus,
custom_seed_corpus_dir=args.custom_seed_corpus_dir)
return 0


Expand Down
32 changes: 31 additions & 1 deletion experiment/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,33 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path):
return seed_corpus_path if os.path.exists(seed_corpus_path) else None


def _unpack_custom_seed_corpus(corpus_directory):
"Unpack seed corpus provided by user"
# remove initial seed corpus
shutil.rmtree(corpus_directory)
os.mkdir(corpus_directory)
benchmark = environment.get('BENCHMARK')
corpus_archive_filename = posixpath.join(
experiment_utils.get_custom_seed_corpora_filestore_path(),
f'{benchmark}.zip')
idx = 0
with zipfile.ZipFile(corpus_archive_filename) as zip_file:
for seed_corpus_file in zip_file.infolist():
if seed_corpus_file.filename.endswith('/'):
# Ignore directories.
continue

if seed_corpus_file.file_size > CORPUS_ELEMENT_BYTES_LIMIT:
continue

output_filename = '%016d' % idx
output_file_path = os.path.join(corpus_directory, output_filename)
zip_file.extract(seed_corpus_file, output_file_path)
idx += 1

logs.info('Unarchived %d files from custom seed corpus.', idx)


def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory):
"""If a clusterfuzz seed corpus archive is available, unpack it into the
corpus directory if it exists. Copied from unpack_seed_corpus in
Expand Down Expand Up @@ -172,7 +199,10 @@ def run_fuzzer(max_total_time, log_filename):
logs.error('Fuzz target binary not found.')
return

_unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
if environment.get('CUSTOM_SEED_CORPUS'):
_unpack_custom_seed_corpus(input_corpus)
else:
_unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
_clean_seed_corpus(input_corpus)

if max_total_time is None:
Expand Down
1 change: 1 addition & 0 deletions experiment/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str,
'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'],
'num_cpu_cores': experiment_config['runner_num_cpu_cores'],
'cpuset': CPUSET,
'custom_seed_corpus_dir': experiment_config['custom_seed_corpus_dir'],
}

if not local_experiment:
Expand Down
1 change: 1 addition & 0 deletions experiment/test_data/experiment-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ git_hash: "git-hash"
no_seeds: false
no_dictionaries: false
oss_fuzz_corpus: false
custom_seed_corpus_dir: null
description: "Test experiment"
concurrent_builds: null
runners_cpus: null
Expand Down
1 change: 1 addition & 0 deletions experiment/test_run_experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,7 @@ def test_copy_resources_to_bucket(tmp_path):
'experiment': 'experiment',
'benchmarks': ['libxslt_xpath'],
'oss_fuzz_corpus': True,
'custom_seed_corpus_dir': None,
}
try:
with mock.patch('common.filestore_utils.cp') as mocked_filestore_cp:
Expand Down
1 change: 1 addition & 0 deletions experiment/test_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ def test_create_trial_instance(benchmark, expected_image, expected_target,
-e NO_SEEDS=False \\
-e NO_DICTIONARIES=False \\
-e OSS_FUZZ_CORPUS=False \\
-e CUSTOM_SEED_CORPUS=None \\
-e DOCKER_REGISTRY=gcr.io/fuzzbench -e CLOUD_PROJECT=fuzzbench -e CLOUD_COMPUTE_ZONE=us-central1-a \\
-e EXPERIMENT_FILESTORE=gs://experiment-data \\
-e REPORT_FILESTORE=gs://web-reports \\
Expand Down