Skip to content

@abohoss: Rewrite upload_report in python. #977

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion report/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

## Experiment Report

* While the experiment is running, `upload_report.sh` periodically generates
* While the experiment is running, `upload_report.py` periodically generates
an experiment report and uploads it to
`gs://oss-fuzz-gcb-experiment-run-logs/Result-reports/`.
* After the experiment a final report is generated and uploaded to GCS.
Expand Down
8 changes: 4 additions & 4 deletions report/docker_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ def run_on_data_from_scratch(cmd=None):
experiment_name = f"{date}-{args.frequency_label}-{args.benchmark_set}"

# Report directory uses the same name as experiment.
# See upload_report.sh on how this is used.
# See upload_report.py on how this is used.
gcs_report_dir = f"{args.sub_dir}/{experiment_name}"

# Trends report use a similarly named path.
Expand All @@ -219,7 +219,7 @@ def run_on_data_from_scratch(cmd=None):

# Generate a report and upload it to GCS
report_process = subprocess.Popen([
"bash", "report/upload_report.sh", local_results_dir, gcs_report_dir,
"python_path", "report/upload_report.py", local_results_dir, gcs_report_dir,
args.benchmark_set, args.model
])

Expand Down Expand Up @@ -363,15 +363,15 @@ def run_standard(cmd=None):
experiment_name = f"{date}-{args.frequency_label}-{args.benchmark_set}"

# Report directory uses the same name as experiment.
# See upload_report.sh on how this is used.
# See upload_report.py on how this is used.
gcs_report_dir = f"{args.sub_dir}/{experiment_name}"

# Trends report use a similarly named path.
gcs_trend_report_path = f"{args.sub_dir}/{experiment_name}.json"

# Generate a report and upload it to GCS
report_process = subprocess.Popen([
"bash", "report/upload_report.sh", local_results_dir, gcs_report_dir,
"python_path", "report/upload_report.py", local_results_dir, gcs_report_dir,
args.benchmark_set, args.model
])

Expand Down
193 changes: 193 additions & 0 deletions report/upload_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
#!/usr/bin/env python3
# Copyright 2025 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import argparse
import os
import time
import subprocess
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional

class ReportUploader:
def __init__(self, results_dir: str, gcs_dir: str, benchmark_set: str, model: str):
self.results_dir = Path(results_dir)
self.gcs_dir = gcs_dir
self.benchmark_set = benchmark_set
self.model = model
self.results_report_dir = Path('results-report')
self.bucket_base_path = 'gs://oss-fuzz-gcb-experiment-run-logs/Result-reports'

logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
self.logger = logging.getLogger(__name__)

def _run_command(self, command: list) -> bool:
try:
subprocess.run(command, check=True, capture_output=True, text=True)
return True
except subprocess.CalledProcessError as e:
self.logger.error(f'Command failed: {" ".join(command)}')
self.logger.error(f'Error: {e.stderr}')
return False

def _generate_report(self) -> bool:
self.logger.info('Generating report...')
command = [
'python', '-m', 'report.web',
'-r', str(self.results_dir),
'-b', self.benchmark_set,
'-m', self.model,
'-o', str(self.results_report_dir)
]
return self._run_command(command)

def upload_files(self, source_path: str, destination_path: str,
content_type: Optional[str] = None) -> bool:
command = ['gsutil', '-q', '-m']

if content_type:
command.extend([
'-h', f'Content-Type:{content_type}',
'-h', 'Cache-Control:public, max-age=3600'
])

command.extend(['cp', '-r', source_path, destination_path])
return self._run_command(command)

def upload_report(self) -> bool:
# Upload the generated report to GCS.
self.logger.info('Uploading report...')
bucket_path = f'{self.bucket_base_path}/{self.gcs_dir}'

# Upload HTML files
if not self.upload_files(
f'{self.results_report_dir}/.',
bucket_path,
'text/html'
):
return False

# Upload JSON files
for json_file in self.results_report_dir.glob('**/*.json'):
relative_path = json_file.relative_to(self.results_report_dir)
if not self.upload_files(
str(json_file),
f'{bucket_path}/{relative_path}',
'application/json'
):
return False

# Upload raw results
if not self.upload_files(
str(self.results_dir),
bucket_path
):
return False

self.logger.info(
f'See the published report at https://llm-exp.oss-fuzz.com/Result-reports/{self.gcs_dir}/'
)
return True

def _generate_training_data(self) -> bool:
# Generate and upload training data.
self.logger.info('Generating and uploading training data...')

# Remove existing training data
if Path('training_data').exists():
subprocess.run(['rm', '-rf', 'training_data'])

# Remove existing GCS training data
subprocess.run([
'gsutil', '-q', 'rm', '-r',
f'{self.bucket_base_path}/{self.gcs_dir}/training_data'
], stderr=subprocess.DEVNULL)

# Generate different versions of training data
configurations = [
[],
['--group'],
['--coverage'],
['--coverage', '--group']
]

for config in configurations:
command = [
'python', '-m', 'data_prep.parse_training_data',
'--experiment-dir', str(self.results_dir),
'--save-dir', 'training_data'
] + config

if not self._run_command(command):
return False

# Upload training data
return self.upload_files(
'training_data',
f'{self.bucket_base_path}/{self.gcs_dir}/training_data'
)

def update_report(self) -> bool:
if not self._generate_report():
return False

if not self.upload_report():
return False

if not self._generate_training_data():
return False

return True

def monitor_and_update(self):
# Sleep 5 minutes for the experiment to start.
time.sleep(300)

while not Path('/experiment_ended').exists():
self.logger.info('Experiment is running... Updating report')
self.update_report()
time.sleep(600)

self.logger.info('Experiment finished. Uploading final report...')
self.update_report()
self.logger.info('Final report uploaded.')


def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description='Upload experiment reports to GCS')
parser.add_argument('results_dir', help='Local directory with experiment results')
parser.add_argument('gcs_dir', help='GCS directory for the report')
parser.add_argument('benchmark_set', help='Benchmark set being used')
parser.add_argument('model', help='LLM model used')
return parser.parse_args()

def main():
args = parse_args()
os.makedirs('results-report', exist_ok=True)

uploader = ReportUploader(
args.results_dir,
args.gcs_dir,
args.benchmark_set,
args.model
)
uploader.monitor_and_update()

if __name__ == '__main__':
main()
110 changes: 0 additions & 110 deletions report/upload_report.sh

This file was deleted.

Loading