diff --git a/common/experiment_utils.py b/common/experiment_utils.py index c533d12ec..2bbd64837 100644 --- a/common/experiment_utils.py +++ b/common/experiment_utils.py @@ -72,6 +72,12 @@ def get_oss_fuzz_corpora_filestore_path(): return posixpath.join(get_experiment_filestore_path(), 'oss_fuzz_corpora') +def get_custom_seed_corpora_filestore_path(): + """Returns path containing the user-provided seed corpora.""" + return posixpath.join(get_experiment_filestore_path(), + 'custom_seed_corpora') + + def get_dispatcher_instance_name(experiment: str) -> str: """Returns a dispatcher instance name for an experiment.""" return 'd-%s' % experiment diff --git a/experiment/resources/runner-startup-script-template.sh b/experiment/resources/runner-startup-script-template.sh index ac43f655f..e0ff19f2d 100644 --- a/experiment/resources/runner-startup-script-template.sh +++ b/experiment/resources/runner-startup-script-template.sh @@ -46,6 +46,7 @@ docker run \ -e NO_SEEDS={{no_seeds}} \ -e NO_DICTIONARIES={{no_dictionaries}} \ -e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \ +-e CUSTOM_SEED_CORPUS_DIR={{custom_seed_corpus_dir}} \ -e DOCKER_REGISTRY={{docker_registry}} {% if not local_experiment %}-e CLOUD_PROJECT={{cloud_project}} -e CLOUD_COMPUTE_ZONE={{cloud_compute_zone}} {% endif %}\ -e EXPERIMENT_FILESTORE={{experiment_filestore}} {% if local_experiment %}-v {{experiment_filestore}}:{{experiment_filestore}} {% endif %}\ -e REPORT_FILESTORE={{report_filestore}} {% if local_experiment %}-v {{report_filestore}}:{{report_filestore}} {% endif %}\ diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 6fa613069..42324c672 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -22,6 +22,7 @@ import sys import tarfile import tempfile +import zipfile from typing import Dict, List import jinja2 @@ -38,6 +39,7 @@ from common import new_process from common import utils from common import yaml_utils +from experiment import runner BENCHMARKS_DIR = os.path.join(utils.ROOT_DIR, 'benchmarks') FUZZERS_DIR = os.path.join(utils.ROOT_DIR, 'fuzzers') @@ -148,6 +150,26 @@ def get_directories(parent_dir): ] +# pylint: disable=too-many-locals +def validate_custom_seed_corpus(custom_seed_corpus_dir, benchmarks): + """Validate seed corpus provided by user""" + if not os.path.isdir(custom_seed_corpus_dir): + raise ValidationError('Corpus location "%s" is invalid.' % + custom_seed_corpus_dir) + + for benchmark in benchmarks: + benchmark_corpus_dir = os.path.join(custom_seed_corpus_dir, benchmark) + if not os.path.exists(benchmark_corpus_dir): + raise ValidationError('Custom seed corpus directory for ' + 'benchmark "%s" does not exist.' % benchmark) + if not os.path.isdir(benchmark_corpus_dir): + raise ValidationError('Seed corpus of benchmark "%s" must be ' + 'a directory.' % benchmark) + if not os.listdir(benchmark_corpus_dir): + raise ValidationError('Seed corpus of benchmark "%s" is empty.' % + benchmark) + + def validate_benchmarks(benchmarks: List[str]): """Parses and validates list of benchmarks.""" benchmark_types = set() @@ -219,7 +241,8 @@ def start_experiment( # pylint: disable=too-many-arguments allow_uncommitted_changes=False, concurrent_builds=None, measurers_cpus=None, - runners_cpus=None): + runners_cpus=None, + custom_seed_corpus_dir=None): """Start a fuzzer benchmarking experiment.""" if not allow_uncommitted_changes: check_no_uncommitted_changes() @@ -248,6 +271,12 @@ def start_experiment( # pylint: disable=too-many-arguments # 12GB is just the amount that KLEE needs, use this default to make KLEE # experiments easier to run. config['runner_memory'] = config.get('runner_memory', '12GB') + + config['custom_seed_corpus_dir'] = custom_seed_corpus_dir + if config['custom_seed_corpus_dir']: + validate_custom_seed_corpus(config['custom_seed_corpus_dir'], + benchmarks) + return start_experiment_from_full_config(config) @@ -330,6 +359,16 @@ def filter_file(tar_info): for benchmark in config['benchmarks']: add_oss_fuzz_corpus(benchmark, oss_fuzz_corpora_dir) + if config['custom_seed_corpus_dir']: + for benchmark in config['benchmarks']: + benchmark_custom_corpus_dir = os.path.join( + config['custom_seed_corpus_dir'], benchmark) + filestore_utils.cp( + benchmark_custom_corpus_dir, + experiment_utils.get_custom_seed_corpora_filestore_path() + '/', + recursive=True, + parallel=True) + class BaseDispatcher: """Class representing the dispatcher.""" @@ -522,6 +561,10 @@ def main(): '--runners-cpus', help='Cpus available to the runners.', required=False) + parser.add_argument('-cs', + '--custom-seed-corpus-dir', + help='Path to the custom seed corpus', + required=False) all_fuzzers = fuzzer_utils.get_fuzzer_names() parser.add_argument('-f', @@ -585,6 +628,14 @@ def main(): parser.error('The sum of runners and measurers cpus is greater than the' ' available cpu cores (%d)' % os.cpu_count()) + if args.custom_seed_corpus_dir: + if args.no_seeds: + parser.error('Cannot enable options "custom_seed_corpus_dir" and ' + '"no_seeds" at the same time') + if args.oss_fuzz_corpus: + parser.error('Cannot enable options "custom_seed_corpus_dir" and ' + '"oss_fuzz_corpus" at the same time') + start_experiment(args.experiment_name, args.experiment_config, args.benchmarks, @@ -596,7 +647,8 @@ def main(): allow_uncommitted_changes=args.allow_uncommitted_changes, concurrent_builds=concurrent_builds, measurers_cpus=measurers_cpus, - runners_cpus=runners_cpus) + runners_cpus=runners_cpus, + custom_seed_corpus_dir=args.custom_seed_corpus_dir) return 0 diff --git a/experiment/runner.py b/experiment/runner.py index 94cc9305e..2c877347f 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -115,6 +115,19 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path): return seed_corpus_path if os.path.exists(seed_corpus_path) else None +def _copy_custom_seed_corpus(corpus_directory): + "Copy custom seed corpus provided by user" + shutil.rmtree(corpus_directory) + benchmark = environment.get('BENCHMARK') + benchmark_custom_corpus_dir = posixpath.join( + experiment_utils.get_custom_seed_corpora_filestore_path(), benchmark) + idx = 0 + filestore_utils.cp(benchmark_custom_corpus_dir, + corpus_directory, + recursive=True) + logs.info('Unarchived %d files from custom seed corpus.', idx) + + def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory): """If a clusterfuzz seed corpus archive is available, unpack it into the corpus directory if it exists. Copied from unpack_seed_corpus in @@ -172,7 +185,10 @@ def run_fuzzer(max_total_time, log_filename): logs.error('Fuzz target binary not found.') return - _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) + if environment.get('CUSTOM_SEED_CORPUS_DIR'): + _copy_custom_seed_corpus(input_corpus) + else: + _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) _clean_seed_corpus(input_corpus) if max_total_time is None: diff --git a/experiment/scheduler.py b/experiment/scheduler.py index 0f8946001..effb9f319 100644 --- a/experiment/scheduler.py +++ b/experiment/scheduler.py @@ -717,6 +717,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str, 'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'], 'num_cpu_cores': experiment_config['runner_num_cpu_cores'], 'cpuset': CPUSET, + 'custom_seed_corpus_dir': experiment_config['custom_seed_corpus_dir'], } if not local_experiment: diff --git a/experiment/test_data/experiment-config.yaml b/experiment/test_data/experiment-config.yaml index 67f556bea..26fe13273 100644 --- a/experiment/test_data/experiment-config.yaml +++ b/experiment/test_data/experiment-config.yaml @@ -31,6 +31,7 @@ git_hash: "git-hash" no_seeds: false no_dictionaries: false oss_fuzz_corpus: false +custom_seed_corpus_dir: null description: "Test experiment" concurrent_builds: null runners_cpus: null diff --git a/experiment/test_run_experiment.py b/experiment/test_run_experiment.py index 47034fbf7..b12a33288 100644 --- a/experiment/test_run_experiment.py +++ b/experiment/test_run_experiment.py @@ -202,6 +202,7 @@ def test_copy_resources_to_bucket(tmp_path): 'experiment': 'experiment', 'benchmarks': ['libxslt_xpath'], 'oss_fuzz_corpus': True, + 'custom_seed_corpus_dir': None, } try: with mock.patch('common.filestore_utils.cp') as mocked_filestore_cp: diff --git a/experiment/test_scheduler.py b/experiment/test_scheduler.py index 02fbbef75..3684505fc 100644 --- a/experiment/test_scheduler.py +++ b/experiment/test_scheduler.py @@ -118,6 +118,7 @@ def test_create_trial_instance(benchmark, expected_image, expected_target, -e NO_SEEDS=False \\ -e NO_DICTIONARIES=False \\ -e OSS_FUZZ_CORPUS=False \\ +-e CUSTOM_SEED_CORPUS_DIR=None \\ -e DOCKER_REGISTRY=gcr.io/fuzzbench -e CLOUD_PROJECT=fuzzbench -e CLOUD_COMPUTE_ZONE=us-central1-a \\ -e EXPERIMENT_FILESTORE=gs://experiment-data \\ -e REPORT_FILESTORE=gs://web-reports \\