diff --git a/common/experiment_utils.py b/common/experiment_utils.py index 2bbd64837..97adde82f 100644 --- a/common/experiment_utils.py +++ b/common/experiment_utils.py @@ -78,6 +78,11 @@ def get_custom_seed_corpora_filestore_path(): 'custom_seed_corpora') +def get_random_corpora_filestore_path(): + """Returns path containing seed corpora for the target fuzzing experiment.""" + return posixpath.join(get_experiment_filestore_path(), 'random_corpora') + + def get_dispatcher_instance_name(experiment: str) -> str: """Returns a dispatcher instance name for an experiment.""" return 'd-%s' % experiment diff --git a/common/random_corpus_fuzzing_utils.py b/common/random_corpus_fuzzing_utils.py new file mode 100644 index 000000000..444e0f323 --- /dev/null +++ b/common/random_corpus_fuzzing_utils.py @@ -0,0 +1,166 @@ +import random +import os +import zipfile +import tempfile +import tarfile +import multiprocessing +import itertools +from typing import List + +from common import experiment_utils +from common import filesystem +from experiment.measurer import coverage_utils +from experiment.measurer import run_coverage +from database import utils as db_utils +from database import models +from common import logs +from common import benchmark_utils +from experiment.build import build_utils +from common import experiment_path as exp_path + +MAX_RANDOM_CORPUS_FILES = 5 + + +def get_covered_branches_per_function(coverage_info): + function_coverage_info = coverage_info["data"][0]["functions"] + covered_branches = set([]) + for function in function_coverage_info: + function_name = function["name"] + for branch in function["branches"]: + if branch[4]: + coverage_key = "{} {}:{}-{}:{} T".format( + function_name, branch[0], branch[1], branch[2], branch[3]) + covered_branches.add(coverage_key) + if branch[5]: + coverage_key = "{} {}:{}-{}:{} F".format( + function_name, branch[0], branch[1], branch[2], branch[3]) + covered_branches.add(coverage_key) + return covered_branches + + +def get_covered_branches(coverage_binary, corpus_dir): + with tempfile.TemporaryDirectory() as tmp_dir: + profdata_file = os.path.join(tmp_dir, 'data.profdata') + merged_profdata_file = os.path.join(tmp_dir, 'merged.profdata') + merged_summary_json_file = os.path.join(tmp_dir, 'merged.json') + crashes_dir = os.path.join(tmp_dir, 'crashes') + filesystem.create_directory(crashes_dir) + + run_coverage.do_coverage_run(coverage_binary, corpus_dir, profdata_file, + crashes_dir) + coverage_utils.merge_profdata_files([profdata_file], + merged_profdata_file) + coverage_utils.generate_json_summary(coverage_binary, + merged_profdata_file, + merged_summary_json_file, + summary_only=False) + coverage_info = coverage_utils.get_coverage_infomation( + merged_summary_json_file) + return get_covered_branches_per_function(coverage_info) + + +def initialize_random_corpus_fuzzing(benchmarks: List[str], + num_trials: int, + target_fuzzing: bool = False): + """Get targeting coverage from the given corpus.""" + pool_args = () + with multiprocessing.Pool(*pool_args) as pool: + target_coverage_list = pool.starmap(prepare_benchmark_random_corpus, [ + (benchmark, num_trials, target_fuzzing) for benchmark in benchmarks + ]) + target_coverage = list(itertools.chain(*target_coverage_list)) + logs.info('Done Preparing target fuzzing (total %d target)', + len(target_coverage)) + db_utils.bulk_save(target_coverage) + + +def get_coverage_binary(benchmark, tmp_dir): + """Copy coverage binary to temp directory for temporary usage.""" + coverage_binaries_dir = build_utils.get_coverage_binaries_dir() + archive_name = 'coverage-build-%s.tar.gz' % benchmark + archive_filestore_path = exp_path.filestore(coverage_binaries_dir / + archive_name) + filesystem.copy(archive_filestore_path, tmp_dir) + archive_path = os.path.join(tmp_dir, archive_name) + tar = tarfile.open(archive_path, 'r:gz') + tar.extractall(tmp_dir) + os.remove(archive_path) + coverage_binary = os.path.join(tmp_dir, + benchmark_utils.get_fuzz_target(benchmark)) + return coverage_binary + + +def prepare_benchmark_random_corpus(benchmark: str, + num_trials: int, + target_fuzzing: bool = False): + """Prepare corpus for target fuzzing.""" + coverage_binary = None + target_coverage = [] + # path used to store and feed seed corpus for benchmark runner + # each trial group will have the same seed input(s) + benchmark_random_corpora = os.path.join( + experiment_utils.get_random_corpora_filestore_path(), benchmark) + filesystem.create_directory(benchmark_random_corpora) + + # get inputs from the custom seed corpus directory + corpus_archive_filename = os.path.join( + experiment_utils.get_custom_seed_corpora_filestore_path(), + f'{benchmark}.zip') + + with tempfile.TemporaryDirectory() as tmp_dir: + if target_fuzzing: + coverage_binary = get_coverage_binary(benchmark, tmp_dir) + + with zipfile.ZipFile(corpus_archive_filename) as zip_file: + # only consider file not directory + corpus_files = [ + f for f in zip_file.infolist() if not f.filename.endswith('/') + ] + for trial_group_num in range(num_trials): + logs.info('Preparing random corpus: %s, trial_group: %d', + benchmark, trial_group_num) + + trial_group_subdir = 'trial-group-%d' % trial_group_num + custom_corpus_trial_dir = os.path.join(benchmark_random_corpora, + trial_group_subdir) + src_dir = os.path.join(tmp_dir, "source") + filesystem.recreate_directory(src_dir) + + source_files = random.sample(corpus_files, + MAX_RANDOM_CORPUS_FILES) + for file in source_files: + zip_file.extract(file, src_dir) + + if target_fuzzing: + dest_dir = os.path.join(tmp_dir, "dest") + filesystem.recreate_directory(dest_dir) + + dest_files = random.sample(corpus_files, + MAX_RANDOM_CORPUS_FILES) + for file in dest_files: + zip_file.extract(file, dest_dir) + + # extract covered branches of source and destination inputs + # then subtract to get targeting branches + src_branches = get_covered_branches(coverage_binary, + src_dir) + dest_branches = get_covered_branches( + coverage_binary, dest_dir) + target_branches = dest_branches - src_branches + + if not target_branches: + raise RuntimeError( + 'Unable to find target branches for %s.' % + benchmark) + + for branch in target_branches: + target_cov = models.TargetCoverage() + target_cov.trial_group_num = int(trial_group_num) + target_cov.benchmark = benchmark + target_cov.target_location = branch + target_coverage.append(target_cov) + + # copy only the src directory + filesystem.copytree(src_dir, custom_corpus_trial_dir) + + return target_coverage diff --git a/database/models.py b/database/models.py index 7cf902397..848849df9 100644 --- a/database/models.py +++ b/database/models.py @@ -50,6 +50,7 @@ class Trial(Base): benchmark = Column(String, nullable=False) time_started = Column(DateTime(), nullable=True) time_ended = Column(DateTime(), nullable=True) + trial_group_num = Column(Integer, nullable=True) # Columns used for preemptible experiments. preemptible = Column(Boolean, default=False, nullable=False) @@ -71,6 +72,8 @@ class Snapshot(Base): trial_id = Column(Integer, ForeignKey('trial.id'), primary_key=True) trial = sqlalchemy.orm.relationship('Trial', back_populates='snapshots') edges_covered = Column(Integer, nullable=False) + targets_covered = Column(Integer, nullable=False) + trial_group_num = Column(Integer, nullable=False) fuzzer_stats = Column(JSON, nullable=True) crashes = sqlalchemy.orm.relationship( 'Crash', @@ -94,3 +97,13 @@ class Crash(Base): __table_args__ = (ForeignKeyConstraint( [time, trial_id], ['snapshot.time', 'snapshot.trial_id']),) + + +class TargetCoverage(Base): + """Represents target branches for the target fuzzing mode.""" + __tablename__ = 'target_coverage' + + id = Column(Integer, primary_key=True) + benchmark = Column(String, nullable=False) + trial_group_num = Column(Integer, nullable=False) + target_location = Column(String, nullable=False) diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py index c14d50c16..71aae29b0 100755 --- a/experiment/dispatcher.py +++ b/experiment/dispatcher.py @@ -24,6 +24,7 @@ import time from typing import List +from common import random_corpus_fuzzing_utils from common import experiment_path as exp_path from common import experiment_utils from common import logs @@ -131,7 +132,8 @@ def build_images_for_trials(fuzzers: List[str], models.Trial(fuzzer=fuzzer, experiment=experiment_name, benchmark=benchmark, - preemptible=preemptible) for _ in range(num_trials) + preemptible=preemptible, + trial_group_num=trial) for trial in range(num_trials) ] trials.extend(fuzzer_benchmark_trials) return trials @@ -159,6 +161,11 @@ def dispatcher_main(): experiment.config['concurrent_builds']) _initialize_trials_in_db(trials) + if experiment.config['random_corpus'] or experiment.config['target_fuzzing']: + random_corpus_fuzzing_utils.initialize_random_corpus_fuzzing( + experiment.benchmarks, experiment.num_trials, + experiment.config['target_fuzzing']) + create_work_subdirs(['experiment-folders', 'measurement-folders']) # Start measurer and scheduler in seperate threads/processes. diff --git a/experiment/measurer/coverage_utils.py b/experiment/measurer/coverage_utils.py index 0122b8454..935e77402 100644 --- a/experiment/measurer/coverage_utils.py +++ b/experiment/measurer/coverage_utils.py @@ -233,10 +233,12 @@ def get_coverage_infomation(coverage_summary_file): class TrialCoverage: # pylint: disable=too-many-instance-attributes """Base class for storing and getting coverage data for a trial.""" - def __init__(self, fuzzer: str, benchmark: str, trial_num: int): + def __init__(self, fuzzer: str, benchmark: str, trial_num: int, + trial_group_num: int): self.fuzzer = fuzzer self.benchmark = benchmark self.trial_num = trial_num + self.trial_group_num = trial_group_num self.benchmark_fuzzer_trial_dir = exp_utils.get_trial_dir( fuzzer, benchmark, trial_num) self.work_dir = exp_utils.get_work_dir() diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py index 07e48dda3..5779e376e 100644 --- a/experiment/measurer/measure_manager.py +++ b/experiment/measurer/measure_manager.py @@ -32,6 +32,7 @@ from sqlalchemy import func from sqlalchemy import orm +from common import random_corpus_fuzzing_utils from common import benchmark_config from common import experiment_utils from common import experiment_path as exp_path @@ -51,7 +52,8 @@ logger = logs.Logger('measurer') # pylint: disable=invalid-name SnapshotMeasureRequest = collections.namedtuple( - 'SnapshotMeasureRequest', ['fuzzer', 'benchmark', 'trial_id', 'cycle']) + 'SnapshotMeasureRequest', + ['fuzzer', 'benchmark', 'trial_id', 'cycle', 'trial_group_num']) NUM_RETRIES = 3 RETRY_DELAY = 3 @@ -76,7 +78,10 @@ def measure_main(experiment_config): max_total_time = experiment_config['max_total_time'] measurers_cpus = experiment_config['measurers_cpus'] runners_cpus = experiment_config['runners_cpus'] - measure_loop(experiment, max_total_time, measurers_cpus, runners_cpus) + trials = experiment_config['trials'] + target_fuzzing = experiment_config['target_fuzzing'] + measure_loop(experiment, trials, max_total_time, measurers_cpus, + runners_cpus, target_fuzzing) # Clean up resources. gc.collect() @@ -95,9 +100,11 @@ def _process_init(cores_queue): def measure_loop(experiment: str, + trials: int, max_total_time: int, measurers_cpus=None, - runners_cpus=None): + runners_cpus=None, + target_fuzzing=False): """Continuously measure trials for |experiment|.""" logger.info('Start measure_loop.') @@ -116,7 +123,7 @@ def measure_loop(experiment: str, with multiprocessing.Pool( *pool_args) as pool, multiprocessing.Manager() as manager: - set_up_coverage_binaries(pool, experiment) + set_up_coverage_binaries(pool, experiment, trials) # Using Multiprocessing.Queue will fail with a complaint about # inheriting queue. q = manager.Queue() # pytype: disable=attribute-error @@ -126,7 +133,8 @@ def measure_loop(experiment: str, # races. all_trials_ended = scheduler.all_trials_ended(experiment) - if not measure_all_trials(experiment, max_total_time, pool, q): + if not measure_all_trials(experiment, max_total_time, pool, q, + target_fuzzing): # We didn't measure any trials. if all_trials_ended: # There are no trials producing snapshots to measure. @@ -141,7 +149,11 @@ def measure_loop(experiment: str, logger.info('Finished measure loop.') -def measure_all_trials(experiment: str, max_total_time: int, pool, q) -> bool: # pylint: disable=invalid-name +def measure_all_trials(experiment: str, + max_total_time: int, + pool, + q, + target_fuzzing=False) -> bool: # pylint: disable=invalid-name """Get coverage data (with coverage runs) for all active trials. Note that this should not be called unless multiprocessing.set_start_method('spawn') was called first. Otherwise it will use fork which breaks logging.""" @@ -158,7 +170,7 @@ def measure_all_trials(experiment: str, max_total_time: int, pool, q) -> bool: return False measure_trial_coverage_args = [ - (unmeasured_snapshot, max_cycle, q) + (unmeasured_snapshot, max_cycle, q, target_fuzzing) for unmeasured_snapshot in unmeasured_snapshots ] @@ -253,13 +265,15 @@ def _get_unmeasured_first_snapshots( snapshot for their trial. The trials are trials in |experiment|.""" trials_without_snapshots = _query_unmeasured_trials(experiment) return [ - SnapshotMeasureRequest(trial.fuzzer, trial.benchmark, trial.id, 1) + SnapshotMeasureRequest(trial.fuzzer, trial.benchmark, trial.id, 1, + trial.trial_group_num) for trial in trials_without_snapshots ] SnapshotWithTime = collections.namedtuple( - 'SnapshotWithTime', ['fuzzer', 'benchmark', 'trial_id', 'time']) + 'SnapshotWithTime', + ['fuzzer', 'benchmark', 'trial_id', 'time', 'trial_group_num']) def _query_measured_latest_snapshots(experiment: str): @@ -270,7 +284,8 @@ def _query_measured_latest_snapshots(experiment: str): # The order of these columns must correspond to the fields in # SnapshotWithTime. columns = (models.Trial.fuzzer, models.Trial.benchmark, - models.Snapshot.trial_id, latest_time_column) + models.Snapshot.trial_id, latest_time_column, + models.Trial.trial_group_num) experiment_filter = models.Snapshot.trial.has(experiment=experiment) group_by_columns = (models.Snapshot.trial_id, models.Trial.benchmark, models.Trial.fuzzer) @@ -300,7 +315,8 @@ def _get_unmeasured_next_snapshots( snapshot_with_cycle = SnapshotMeasureRequest(snapshot.fuzzer, snapshot.benchmark, snapshot.trial_id, - next_cycle) + next_cycle, + snapshot.trial_group_num) next_snapshots.append(snapshot_with_cycle) return next_snapshots @@ -357,8 +373,8 @@ class SnapshotMeasurer(coverage_utils.TrialCoverage): # pylint: disable=too-man UNIT_BLACKLIST = collections.defaultdict(set) def __init__(self, fuzzer: str, benchmark: str, trial_num: int, - trial_logger: logs.Logger): - super().__init__(fuzzer, benchmark, trial_num) + trial_logger: logs.Logger, trial_group_num: int): + super().__init__(fuzzer, benchmark, trial_num, trial_group_num) self.logger = trial_logger self.corpus_dir = os.path.join(self.measurement_dir, 'corpus') @@ -428,6 +444,31 @@ def generate_summary(self, cycle: int, summary_only=False): self.logger.error( 'Coverage summary json file generation failed in the end.') + def get_current_target_coverage(self) -> int: + """Get the current number of lines covered.""" + if not os.path.exists(self.cov_summary_file): + self.logger.warning('No coverage summary json file found.') + return 0 + try: + total_target_covered = 0 + coverage_info = coverage_utils.get_coverage_infomation( + self.cov_summary_file) + covered_branches = random_corpus_fuzzing_utils.get_covered_branches_per_function( + coverage_info) + # measure target coverage + with db_utils.session_scope() as session: + target_branches = session.query(models.TargetCoverage).filter( + models.TargetCoverage.trial_group_num == + self.trial_group_num).all() + for target_branch in target_branches: + if target_branch.target_location in covered_branches: + total_target_covered += 1 + return total_target_covered + except Exception: # pylint: disable=broad-except + self.logger.error( + 'Coverage summary json file defective or missing.') + return 0 + def get_current_coverage(self) -> int: """Get the current number of lines covered.""" if not os.path.exists(self.cov_summary_file): @@ -612,8 +653,8 @@ def get_fuzzer_stats(stats_filestore_path): def measure_trial_coverage( # pylint: disable=invalid-name - measure_req, max_cycle: int, - q: multiprocessing.Queue) -> models.Snapshot: + measure_req, max_cycle: int, q: multiprocessing.Queue, + target_fuzzing) -> models.Snapshot: """Measure the coverage obtained by |trial_num| on |benchmark| using |fuzzer|.""" initialize_logs() @@ -624,24 +665,28 @@ def measure_trial_coverage( # pylint: disable=invalid-name try: snapshot = measure_snapshot_coverage(measure_req.fuzzer, measure_req.benchmark, - measure_req.trial_id, cycle) + measure_req.trial_id, cycle, + measure_req.trial_group_num, + target_fuzzing) if not snapshot: break q.put(snapshot) except Exception: # pylint: disable=broad-except - logger.error('Error measuring cycle.', - extras={ - 'fuzzer': measure_req.fuzzer, - 'benchmark': measure_req.benchmark, - 'trial_id': str(measure_req.trial_id), - 'cycle': str(cycle), - }) + logger.error( + 'Error measuring cycle.', + extras={ + 'fuzzer': measure_req.fuzzer, + 'benchmark': measure_req.benchmark, + 'trial_id': str(measure_req.trial_id), + 'trial_group_num': str(measure_req.trial_group_num), + 'cycle': str(cycle), + }) logger.debug('Done measuring trial: %d.', measure_req.trial_id) def measure_snapshot_coverage( # pylint: disable=too-many-locals - fuzzer: str, benchmark: str, trial_num: int, - cycle: int) -> models.Snapshot: + fuzzer: str, benchmark: str, trial_num: int, cycle: int, + trial_group_num: int, target_fuzzing: bool) -> models.Snapshot: """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer| and |benchmark|.""" snapshot_logger = logs.Logger('measurer', @@ -650,9 +695,10 @@ def measure_snapshot_coverage( # pylint: disable=too-many-locals 'benchmark': benchmark, 'trial_id': str(trial_num), 'cycle': str(cycle), + 'trial_group_num': str(trial_group_num) }) snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num, - snapshot_logger) + snapshot_logger, trial_group_num) measuring_start_time = time.time() snapshot_logger.info('Measuring cycle: %d.', cycle) @@ -660,9 +706,14 @@ def measure_snapshot_coverage( # pylint: disable=too-many-locals if snapshot_measurer.is_cycle_unchanged(cycle): snapshot_logger.info('Cycle: %d is unchanged.', cycle) regions_covered = snapshot_measurer.get_current_coverage() + targets_covered = 0 + if target_fuzzing: + targets_covered = snapshot_measurer.get_current_target_coverage() fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle) return models.Snapshot(time=this_time, trial_id=trial_num, + trial_group_num=trial_group_num, + targets_covered=targets_covered, edges_covered=regions_covered, fuzzer_stats=fuzzer_stats_data, crashes=[]) @@ -698,8 +749,13 @@ def measure_snapshot_coverage( # pylint: disable=too-many-locals # Get the coverage of the new corpus units. regions_covered = snapshot_measurer.get_current_coverage() + targets_covered = 0 + if target_fuzzing: + targets_covered = snapshot_measurer.get_current_target_coverage() fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle) snapshot = models.Snapshot(time=this_time, + trial_group_num=trial_group_num, + targets_covered=targets_covered, trial_id=trial_num, edges_covered=regions_covered, fuzzer_stats=fuzzer_stats_data, @@ -714,7 +770,7 @@ def measure_snapshot_coverage( # pylint: disable=too-many-locals return snapshot -def set_up_coverage_binaries(pool, experiment): +def set_up_coverage_binaries(pool, experiment, trials): """Set up coverage binaries for all benchmarks in |experiment|.""" # Use set comprehension to select distinct benchmarks. with db_utils.session_scope() as session: diff --git a/experiment/resources/runner-startup-script-template.sh b/experiment/resources/runner-startup-script-template.sh index e0ff19f2d..aed81d188 100644 --- a/experiment/resources/runner-startup-script-template.sh +++ b/experiment/resources/runner-startup-script-template.sh @@ -42,8 +42,11 @@ docker run \ -e BENCHMARK={{benchmark}} \ -e EXPERIMENT={{experiment}} \ -e TRIAL_ID={{trial_id}} \ +-e TRIAL_GROUP_NUM={{trial_group_num}} \ -e MAX_TOTAL_TIME={{max_total_time}} \ -e NO_SEEDS={{no_seeds}} \ +-e RANDOM_CORPUS={{random_corpus}} \ +-e TARGET_FUZZING={{target_fuzzing}} \ -e NO_DICTIONARIES={{no_dictionaries}} \ -e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \ -e CUSTOM_SEED_CORPUS_DIR={{custom_seed_corpus_dir}} \ diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py index 04d85a656..19927fe53 100644 --- a/experiment/run_experiment.py +++ b/experiment/run_experiment.py @@ -261,7 +261,9 @@ def start_experiment( # pylint: disable=too-many-arguments concurrent_builds=None, measurers_cpus=None, runners_cpus=None, - custom_seed_corpus_dir=None): + custom_seed_corpus_dir=None, + random_corpus=None, + target_fuzzing=False): """Start a fuzzer benchmarking experiment.""" if not allow_uncommitted_changes: check_no_uncommitted_changes() @@ -295,6 +297,8 @@ def start_experiment( # pylint: disable=too-many-arguments if config['custom_seed_corpus_dir']: validate_and_pack_custom_seed_corpus(config['custom_seed_corpus_dir'], benchmarks) + config['random_corpus'] = random_corpus + config['target_fuzzing'] = target_fuzzing return start_experiment_from_full_config(config) @@ -611,6 +615,18 @@ def main(): required=False, default=False, action='store_true') + parser.add_argument('-rs', + '--random-corpus', + help='Randomly pick seed corpus.', + required=False, + default=False, + action='store_true') + parser.add_argument('-tf', + '--target-fuzzing', + help='Target fuzzing mode.', + required=False, + default=False, + action='store_true') parser.add_argument( '-o', '--oss-fuzz-corpus', @@ -655,6 +671,13 @@ def main(): parser.error('Cannot enable options "custom_seed_corpus_dir" and ' '"oss_fuzz_corpus" at the same time') + if args.target_fuzzing and not args.custom_seed_corpus_dir: + parser.error('Target fuzzing can only be run with custom seed corpus') + + if args.random_corpus and not args.custom_seed_corpus_dir: + parser.error( + 'Random corpus option can only be run with custom seed corpus') + start_experiment(args.experiment_name, args.experiment_config, args.benchmarks, @@ -667,7 +690,9 @@ def main(): concurrent_builds=concurrent_builds, measurers_cpus=measurers_cpus, runners_cpus=runners_cpus, - custom_seed_corpus_dir=args.custom_seed_corpus_dir) + custom_seed_corpus_dir=args.custom_seed_corpus_dir, + random_corpus=args.random_corpus, + target_fuzzing=args.target_fuzzing) return 0 diff --git a/experiment/runner.py b/experiment/runner.py index 6dcb2c49c..ba99787ea 100644 --- a/experiment/runner.py +++ b/experiment/runner.py @@ -115,6 +115,19 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path): return seed_corpus_path if os.path.exists(seed_corpus_path) else None +def _unpack_random_corpus(corpus_directory): + # remove initial seed corpus + shutil.rmtree(corpus_directory) + + benchmark = environment.get('BENCHMARK') + trial_group_num = environment.get('TRIAL_GROUP_NUM') + random_corpora_dir = experiment_utils.get_random_corpora_filestore_path() + random_corpora_sub_dir = 'trial-group-%s' % int(trial_group_num) + random_corpus_dir = posixpath.join(random_corpora_dir, benchmark, + random_corpora_sub_dir) + shutil.copytree(random_corpus_dir, corpus_directory) + + def _unpack_custom_seed_corpus(corpus_directory): "Unpack seed corpus provided by user" # remove initial seed corpus @@ -200,7 +213,11 @@ def run_fuzzer(max_total_time, log_filename): return if environment.get('CUSTOM_SEED_CORPUS_DIR'): - _unpack_custom_seed_corpus(input_corpus) + if environment.get('RANDOM_CORPUS') or environment.get( + 'TARGET_FUZZING'): + _unpack_random_corpus(input_corpus) + else: + _unpack_custom_seed_corpus(input_corpus) else: _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus) _clean_seed_corpus(input_corpus) diff --git a/experiment/scheduler.py b/experiment/scheduler.py index effb9f319..e04e368cb 100644 --- a/experiment/scheduler.py +++ b/experiment/scheduler.py @@ -651,6 +651,7 @@ def __init__(self, trial): self.time_started = trial.time_started self.time_ended = trial.time_ended self.preemptible = trial.preemptible + self.trial_group_num = trial.trial_group_num def _initialize_logs(experiment): @@ -678,7 +679,8 @@ def _start_trial(trial: TrialProxy, experiment_config: dict): _initialize_logs(experiment_config['experiment']) logger.info('Start trial %d.', trial.id) started = create_trial_instance(trial.fuzzer, trial.benchmark, trial.id, - experiment_config, trial.preemptible) + trial.trial_group_num, experiment_config, + trial.preemptible) if started: trial.time_started = datetime_now() return trial @@ -688,6 +690,7 @@ def _start_trial(trial: TrialProxy, experiment_config: dict): def render_startup_script_template(instance_name: str, fuzzer: str, benchmark: str, trial_id: int, + trial_group_num: int, experiment_config: dict): """Render the startup script using the template and the parameters provided and return the result.""" @@ -705,6 +708,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str, 'experiment': experiment, 'fuzzer': fuzzer, 'trial_id': trial_id, + 'trial_group_num': trial_group_num, 'max_total_time': experiment_config['max_total_time'], 'experiment_filestore': experiment_config['experiment_filestore'], 'report_filestore': experiment_config['report_filestore'], @@ -713,6 +717,8 @@ def render_startup_script_template(instance_name: str, fuzzer: str, 'docker_registry': experiment_config['docker_registry'], 'local_experiment': local_experiment, 'no_seeds': experiment_config['no_seeds'], + 'random_corpus': experiment_config['random_corpus'], + 'target_fuzzing': experiment_config['target_fuzzing'], 'no_dictionaries': experiment_config['no_dictionaries'], 'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'], 'num_cpu_cores': experiment_config['runner_num_cpu_cores'], @@ -728,13 +734,15 @@ def render_startup_script_template(instance_name: str, fuzzer: str, def create_trial_instance(fuzzer: str, benchmark: str, trial_id: int, - experiment_config: dict, preemptible: bool) -> bool: + trial_group_num: int, experiment_config: dict, + preemptible: bool) -> bool: """Create or start a trial instance for a specific trial_id,fuzzer,benchmark.""" instance_name = experiment_utils.get_trial_instance_name( experiment_config['experiment'], trial_id) startup_script = render_startup_script_template(instance_name, fuzzer, benchmark, trial_id, + trial_group_num, experiment_config) startup_script_path = '/tmp/%s-start-docker.sh' % instance_name with open(startup_script_path, 'w') as file_handle: