Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions common/experiment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ def get_custom_seed_corpora_filestore_path():
'custom_seed_corpora')


def get_random_corpora_filestore_path():
"""Returns path containing seed corpora for the target fuzzing experiment."""
return posixpath.join(get_experiment_filestore_path(), 'random_corpora')


def get_dispatcher_instance_name(experiment: str) -> str:
"""Returns a dispatcher instance name for an experiment."""
return 'd-%s' % experiment
Expand Down
166 changes: 166 additions & 0 deletions common/random_corpus_fuzzing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import random
import os
import zipfile
import tempfile
import tarfile
import multiprocessing
import itertools
from typing import List

from common import experiment_utils
from common import filesystem
from experiment.measurer import coverage_utils
from experiment.measurer import run_coverage
from database import utils as db_utils
from database import models
from common import logs
from common import benchmark_utils
from experiment.build import build_utils
from common import experiment_path as exp_path

MAX_RANDOM_CORPUS_FILES = 5


def get_covered_branches_per_function(coverage_info):
function_coverage_info = coverage_info["data"][0]["functions"]
covered_branches = set([])
for function in function_coverage_info:
function_name = function["name"]
for branch in function["branches"]:
if branch[4]:
coverage_key = "{} {}:{}-{}:{} T".format(
function_name, branch[0], branch[1], branch[2], branch[3])
covered_branches.add(coverage_key)
if branch[5]:
coverage_key = "{} {}:{}-{}:{} F".format(
function_name, branch[0], branch[1], branch[2], branch[3])
covered_branches.add(coverage_key)
return covered_branches


def get_covered_branches(coverage_binary, corpus_dir):
with tempfile.TemporaryDirectory() as tmp_dir:
profdata_file = os.path.join(tmp_dir, 'data.profdata')
merged_profdata_file = os.path.join(tmp_dir, 'merged.profdata')
merged_summary_json_file = os.path.join(tmp_dir, 'merged.json')
crashes_dir = os.path.join(tmp_dir, 'crashes')
filesystem.create_directory(crashes_dir)

run_coverage.do_coverage_run(coverage_binary, corpus_dir, profdata_file,
crashes_dir)
coverage_utils.merge_profdata_files([profdata_file],
merged_profdata_file)
coverage_utils.generate_json_summary(coverage_binary,
merged_profdata_file,
merged_summary_json_file,
summary_only=False)
coverage_info = coverage_utils.get_coverage_infomation(
merged_summary_json_file)
return get_covered_branches_per_function(coverage_info)


def initialize_random_corpus_fuzzing(benchmarks: List[str],
num_trials: int,
target_fuzzing: bool = False):
"""Get targeting coverage from the given corpus."""
pool_args = ()
with multiprocessing.Pool(*pool_args) as pool:
target_coverage_list = pool.starmap(prepare_benchmark_random_corpus, [
(benchmark, num_trials, target_fuzzing) for benchmark in benchmarks
])
target_coverage = list(itertools.chain(*target_coverage_list))
logs.info('Done Preparing target fuzzing (total %d target)',
len(target_coverage))
db_utils.bulk_save(target_coverage)


def get_coverage_binary(benchmark, tmp_dir):
"""Copy coverage binary to temp directory for temporary usage."""
coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
archive_name = 'coverage-build-%s.tar.gz' % benchmark
archive_filestore_path = exp_path.filestore(coverage_binaries_dir /
archive_name)
filesystem.copy(archive_filestore_path, tmp_dir)
archive_path = os.path.join(tmp_dir, archive_name)
tar = tarfile.open(archive_path, 'r:gz')
tar.extractall(tmp_dir)
os.remove(archive_path)
coverage_binary = os.path.join(tmp_dir,
benchmark_utils.get_fuzz_target(benchmark))
return coverage_binary


def prepare_benchmark_random_corpus(benchmark: str,
num_trials: int,
target_fuzzing: bool = False):
"""Prepare corpus for target fuzzing."""
coverage_binary = None
target_coverage = []
# path used to store and feed seed corpus for benchmark runner
# each trial group will have the same seed input(s)
benchmark_random_corpora = os.path.join(
experiment_utils.get_random_corpora_filestore_path(), benchmark)
filesystem.create_directory(benchmark_random_corpora)

# get inputs from the custom seed corpus directory
corpus_archive_filename = os.path.join(
experiment_utils.get_custom_seed_corpora_filestore_path(),
f'{benchmark}.zip')

with tempfile.TemporaryDirectory() as tmp_dir:
if target_fuzzing:
coverage_binary = get_coverage_binary(benchmark, tmp_dir)

with zipfile.ZipFile(corpus_archive_filename) as zip_file:
# only consider file not directory
corpus_files = [
f for f in zip_file.infolist() if not f.filename.endswith('/')
]
for trial_group_num in range(num_trials):
logs.info('Preparing random corpus: %s, trial_group: %d',
benchmark, trial_group_num)

trial_group_subdir = 'trial-group-%d' % trial_group_num
custom_corpus_trial_dir = os.path.join(benchmark_random_corpora,
trial_group_subdir)
src_dir = os.path.join(tmp_dir, "source")
filesystem.recreate_directory(src_dir)

source_files = random.sample(corpus_files,
MAX_RANDOM_CORPUS_FILES)
for file in source_files:
zip_file.extract(file, src_dir)

if target_fuzzing:
dest_dir = os.path.join(tmp_dir, "dest")
filesystem.recreate_directory(dest_dir)

dest_files = random.sample(corpus_files,
MAX_RANDOM_CORPUS_FILES)
for file in dest_files:
zip_file.extract(file, dest_dir)

# extract covered branches of source and destination inputs
# then subtract to get targeting branches
src_branches = get_covered_branches(coverage_binary,
src_dir)
dest_branches = get_covered_branches(
coverage_binary, dest_dir)
target_branches = dest_branches - src_branches

if not target_branches:
raise RuntimeError(
'Unable to find target branches for %s.' %
benchmark)

for branch in target_branches:
target_cov = models.TargetCoverage()
target_cov.trial_group_num = int(trial_group_num)
target_cov.benchmark = benchmark
target_cov.target_location = branch
target_coverage.append(target_cov)

# copy only the src directory
filesystem.copytree(src_dir, custom_corpus_trial_dir)

return target_coverage
13 changes: 13 additions & 0 deletions database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ class Trial(Base):
benchmark = Column(String, nullable=False)
time_started = Column(DateTime(), nullable=True)
time_ended = Column(DateTime(), nullable=True)
trial_group_num = Column(Integer, nullable=True)

# Columns used for preemptible experiments.
preemptible = Column(Boolean, default=False, nullable=False)
Expand All @@ -71,6 +72,8 @@ class Snapshot(Base):
trial_id = Column(Integer, ForeignKey('trial.id'), primary_key=True)
trial = sqlalchemy.orm.relationship('Trial', back_populates='snapshots')
edges_covered = Column(Integer, nullable=False)
targets_covered = Column(Integer, nullable=False)
trial_group_num = Column(Integer, nullable=False)
fuzzer_stats = Column(JSON, nullable=True)
crashes = sqlalchemy.orm.relationship(
'Crash',
Expand All @@ -94,3 +97,13 @@ class Crash(Base):

__table_args__ = (ForeignKeyConstraint(
[time, trial_id], ['snapshot.time', 'snapshot.trial_id']),)


class TargetCoverage(Base):
"""Represents target branches for the target fuzzing mode."""
__tablename__ = 'target_coverage'

id = Column(Integer, primary_key=True)
benchmark = Column(String, nullable=False)
trial_group_num = Column(Integer, nullable=False)
target_location = Column(String, nullable=False)
9 changes: 8 additions & 1 deletion experiment/dispatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import time
from typing import List

from common import random_corpus_fuzzing_utils
from common import experiment_path as exp_path
from common import experiment_utils
from common import logs
Expand Down Expand Up @@ -131,7 +132,8 @@ def build_images_for_trials(fuzzers: List[str],
models.Trial(fuzzer=fuzzer,
experiment=experiment_name,
benchmark=benchmark,
preemptible=preemptible) for _ in range(num_trials)
preemptible=preemptible,
trial_group_num=trial) for trial in range(num_trials)
]
trials.extend(fuzzer_benchmark_trials)
return trials
Expand Down Expand Up @@ -159,6 +161,11 @@ def dispatcher_main():
experiment.config['concurrent_builds'])
_initialize_trials_in_db(trials)

if experiment.config['random_corpus'] or experiment.config['target_fuzzing']:
random_corpus_fuzzing_utils.initialize_random_corpus_fuzzing(
experiment.benchmarks, experiment.num_trials,
experiment.config['target_fuzzing'])

create_work_subdirs(['experiment-folders', 'measurement-folders'])

# Start measurer and scheduler in seperate threads/processes.
Expand Down
4 changes: 3 additions & 1 deletion experiment/measurer/coverage_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,10 +233,12 @@ def get_coverage_infomation(coverage_summary_file):
class TrialCoverage: # pylint: disable=too-many-instance-attributes
"""Base class for storing and getting coverage data for a trial."""

def __init__(self, fuzzer: str, benchmark: str, trial_num: int):
def __init__(self, fuzzer: str, benchmark: str, trial_num: int,
trial_group_num: int):
self.fuzzer = fuzzer
self.benchmark = benchmark
self.trial_num = trial_num
self.trial_group_num = trial_group_num
self.benchmark_fuzzer_trial_dir = exp_utils.get_trial_dir(
fuzzer, benchmark, trial_num)
self.work_dir = exp_utils.get_work_dir()
Expand Down
Loading