diff --git a/report/README.md b/report/README.md index 622b53bc9d..9a9da560e9 100644 --- a/report/README.md +++ b/report/README.md @@ -2,7 +2,7 @@ ## Experiment Report -* While the experiment is running, `upload_report.sh` periodically generates +* While the experiment is running, `upload_report.py` periodically generates an experiment report and uploads it to `gs://oss-fuzz-gcb-experiment-run-logs/Result-reports/`. * After the experiment a final report is generated and uploaded to GCS. diff --git a/report/docker_run.py b/report/docker_run.py index af0f7fd05f..78dcabddfa 100755 --- a/report/docker_run.py +++ b/report/docker_run.py @@ -209,7 +209,7 @@ def run_on_data_from_scratch(cmd=None): experiment_name = f"{date}-{args.frequency_label}-{args.benchmark_set}" # Report directory uses the same name as experiment. - # See upload_report.sh on how this is used. + # See upload_report.py on how this is used. gcs_report_dir = f"{args.sub_dir}/{experiment_name}" # Trends report use a similarly named path. @@ -219,7 +219,7 @@ def run_on_data_from_scratch(cmd=None): # Generate a report and upload it to GCS report_process = subprocess.Popen([ - "bash", "report/upload_report.sh", local_results_dir, gcs_report_dir, + "python_path", "report/upload_report.py", local_results_dir, gcs_report_dir, args.benchmark_set, args.model ]) @@ -363,7 +363,7 @@ def run_standard(cmd=None): experiment_name = f"{date}-{args.frequency_label}-{args.benchmark_set}" # Report directory uses the same name as experiment. - # See upload_report.sh on how this is used. + # See upload_report.py on how this is used. gcs_report_dir = f"{args.sub_dir}/{experiment_name}" # Trends report use a similarly named path. @@ -371,7 +371,7 @@ def run_standard(cmd=None): # Generate a report and upload it to GCS report_process = subprocess.Popen([ - "bash", "report/upload_report.sh", local_results_dir, gcs_report_dir, + "python_path", "report/upload_report.py", local_results_dir, gcs_report_dir, args.benchmark_set, args.model ]) diff --git a/report/upload_report.py b/report/upload_report.py new file mode 100644 index 0000000000..666a34cade --- /dev/null +++ b/report/upload_report.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python3 +# Copyright 2025 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the 'License'); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an 'AS IS' BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import time +import subprocess +import logging +from datetime import datetime +from pathlib import Path +from typing import Optional + +class ReportUploader: + def __init__(self, results_dir: str, gcs_dir: str, benchmark_set: str, model: str): + self.results_dir = Path(results_dir) + self.gcs_dir = gcs_dir + self.benchmark_set = benchmark_set + self.model = model + self.results_report_dir = Path('results-report') + self.bucket_base_path = 'gs://oss-fuzz-gcb-experiment-run-logs/Result-reports' + + logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' + ) + self.logger = logging.getLogger(__name__) + + def _run_command(self, command: list) -> bool: + try: + subprocess.run(command, check=True, capture_output=True, text=True) + return True + except subprocess.CalledProcessError as e: + self.logger.error(f'Command failed: {" ".join(command)}') + self.logger.error(f'Error: {e.stderr}') + return False + + def _generate_report(self) -> bool: + self.logger.info('Generating report...') + command = [ + 'python', '-m', 'report.web', + '-r', str(self.results_dir), + '-b', self.benchmark_set, + '-m', self.model, + '-o', str(self.results_report_dir) + ] + return self._run_command(command) + + def upload_files(self, source_path: str, destination_path: str, + content_type: Optional[str] = None) -> bool: + command = ['gsutil', '-q', '-m'] + + if content_type: + command.extend([ + '-h', f'Content-Type:{content_type}', + '-h', 'Cache-Control:public, max-age=3600' + ]) + + command.extend(['cp', '-r', source_path, destination_path]) + return self._run_command(command) + + def upload_report(self) -> bool: + # Upload the generated report to GCS. + self.logger.info('Uploading report...') + bucket_path = f'{self.bucket_base_path}/{self.gcs_dir}' + + # Upload HTML files + if not self.upload_files( + f'{self.results_report_dir}/.', + bucket_path, + 'text/html' + ): + return False + + # Upload JSON files + for json_file in self.results_report_dir.glob('**/*.json'): + relative_path = json_file.relative_to(self.results_report_dir) + if not self.upload_files( + str(json_file), + f'{bucket_path}/{relative_path}', + 'application/json' + ): + return False + + # Upload raw results + if not self.upload_files( + str(self.results_dir), + bucket_path + ): + return False + + self.logger.info( + f'See the published report at https://llm-exp.oss-fuzz.com/Result-reports/{self.gcs_dir}/' + ) + return True + + def _generate_training_data(self) -> bool: + # Generate and upload training data. + self.logger.info('Generating and uploading training data...') + + # Remove existing training data + if Path('training_data').exists(): + subprocess.run(['rm', '-rf', 'training_data']) + + # Remove existing GCS training data + subprocess.run([ + 'gsutil', '-q', 'rm', '-r', + f'{self.bucket_base_path}/{self.gcs_dir}/training_data' + ], stderr=subprocess.DEVNULL) + + # Generate different versions of training data + configurations = [ + [], + ['--group'], + ['--coverage'], + ['--coverage', '--group'] + ] + + for config in configurations: + command = [ + 'python', '-m', 'data_prep.parse_training_data', + '--experiment-dir', str(self.results_dir), + '--save-dir', 'training_data' + ] + config + + if not self._run_command(command): + return False + + # Upload training data + return self.upload_files( + 'training_data', + f'{self.bucket_base_path}/{self.gcs_dir}/training_data' + ) + + def update_report(self) -> bool: + if not self._generate_report(): + return False + + if not self.upload_report(): + return False + + if not self._generate_training_data(): + return False + + return True + + def monitor_and_update(self): + # Sleep 5 minutes for the experiment to start. + time.sleep(300) + + while not Path('/experiment_ended').exists(): + self.logger.info('Experiment is running... Updating report') + self.update_report() + time.sleep(600) + + self.logger.info('Experiment finished. Uploading final report...') + self.update_report() + self.logger.info('Final report uploaded.') + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser(description='Upload experiment reports to GCS') + parser.add_argument('results_dir', help='Local directory with experiment results') + parser.add_argument('gcs_dir', help='GCS directory for the report') + parser.add_argument('benchmark_set', help='Benchmark set being used') + parser.add_argument('model', help='LLM model used') + return parser.parse_args() + +def main(): + args = parse_args() + os.makedirs('results-report', exist_ok=True) + + uploader = ReportUploader( + args.results_dir, + args.gcs_dir, + args.benchmark_set, + args.model + ) + uploader.monitor_and_update() + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/report/upload_report.sh b/report/upload_report.sh deleted file mode 100644 index e2632d05c5..0000000000 --- a/report/upload_report.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash -# Copyright 2024 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -## Usage: -## bash report/upload_report.sh results_dir [gcs_dir] -## -## results_dir is the local directory with the experiment results. -## gcs_dir is the name of the directory for the report in gs://oss-fuzz-gcb-experiment-run-logs/Result-reports/. -## Defaults to '$(whoami)-%YY-%MM-%DD'. - -# TODO(dongge): Re-write this script in Python as it gets more complex. - -RESULTS_DIR=$1 -GCS_DIR=$2 -BENCHMARK_SET=$3 -MODEL=$4 -DATE=$(date '+%Y-%m-%d') - -# Sleep 5 minutes for the experiment to start. -sleep 300 - -if [[ $RESULTS_DIR = '' ]] -then - echo 'This script takes the results directory as the first argument' - exit 1 -fi - -if [[ $GCS_DIR = '' ]] -then - echo "This script needs to take gcloud Bucket directory as the second argument. Consider using $(whoami)-${DATE:?}." - exit 1 -fi - -# The LLM used to generate and fix fuzz targets. -if [[ $MODEL = '' ]] -then - echo "This script needs to take LLM as the third argument." - exit 1 -fi - -mkdir results-report - -update_report() { - # Generate the report - $PYTHON -m report.web -r "${RESULTS_DIR:?}" -b "${BENCHMARK_SET:?}" -m "$MODEL" -o results-report - - cd results-report || exit 1 - - # Upload the report to GCS. - echo "Uploading the report." - BUCKET_PATH="gs://oss-fuzz-gcb-experiment-run-logs/Result-reports/${GCS_DIR:?}" - # Upload HTMLs. - gsutil -q -m -h "Content-Type:text/html" \ - -h "Cache-Control:public, max-age=3600" \ - cp -r . "$BUCKET_PATH" - # Find all JSON files and upload them, removing the leading './' - find . -name '*json' | while read -r file; do - file_path="${file#./}" # Remove the leading "./". - gsutil -q -m -h "Content-Type:application/json" \ - -h "Cache-Control:public, max-age=3600" cp "$file" "$BUCKET_PATH/$file_path" - done - - cd .. - - # Upload the raw results into the same GCS directory. - echo "Uploading the raw results." - gsutil -q -m cp -r "${RESULTS_DIR:?}" \ - "gs://oss-fuzz-gcb-experiment-run-logs/Result-reports/${GCS_DIR:?}" - - echo "See the published report at https://llm-exp.oss-fuzz.com/Result-reports/${GCS_DIR:?}/" - - # Upload training data. - echo "Uploading training data." - rm -rf 'training_data' - gsutil -q rm -r "gs://oss-fuzz-gcb-experiment-run-logs/Result-reports/${GCS_DIR:?}/training_data" || true - - $PYTHON -m data_prep.parse_training_data \ - --experiment-dir "${RESULTS_DIR:?}" --save-dir 'training_data' - $PYTHON -m data_prep.parse_training_data --group \ - --experiment-dir "${RESULTS_DIR:?}" --save-dir 'training_data' - $PYTHON -m data_prep.parse_training_data --coverage \ - --experiment-dir "${RESULTS_DIR:?}" --save-dir 'training_data' - $PYTHON -m data_prep.parse_training_data --coverage --group \ - --experiment-dir "${RESULTS_DIR:?}" --save-dir 'training_data' - gsutil -q cp -r 'training_data' \ - "gs://oss-fuzz-gcb-experiment-run-logs/Result-reports/${GCS_DIR:?}" -} - -while [[ ! -f /experiment_ended ]]; do - update_report - echo "Experiment is running..." - sleep 600 -done - -echo "Experiment finished." -update_report -echo "Final report uploaded."