Skip to content

Commit 853e3e3

Browse files
abohossDonggeLiu
andcommitted
Update report upload logic (#972)
As mentioned in #971 , I created a `upload_report.py` file where this python version offers several improvements over the bash script including: 1. Better Structure: - Object-oriented design with a clear separation of concerns - Methods are modular and reusable 2. Improved Error Handling: - Comprehensive logging - Proper exception handling - Command execution status checking 3. Enhanced Features: - Argument parsing with helpful error messages - Better file path handling using Path objects 4. Better Maintainability: - Logical grouping of related functionality - Easier to extend and modify --------- Co-authored-by: Dongge Liu <[email protected]>
1 parent 3ee6cfa commit 853e3e3

File tree

4 files changed

+198
-115
lines changed

4 files changed

+198
-115
lines changed

report/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
## Experiment Report
44

5-
* While the experiment is running, `upload_report.sh` periodically generates
5+
* While the experiment is running, `upload_report.py` periodically generates
66
an experiment report and uploads it to
77
`gs://oss-fuzz-gcb-experiment-run-logs/Result-reports/`.
88
* After the experiment a final report is generated and uploaded to GCS.

report/docker_run.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ def run_on_data_from_scratch(cmd=None):
209209
experiment_name = f"{date}-{args.frequency_label}-{args.benchmark_set}"
210210

211211
# Report directory uses the same name as experiment.
212-
# See upload_report.sh on how this is used.
212+
# See upload_report.py on how this is used.
213213
gcs_report_dir = f"{args.sub_dir}/{experiment_name}"
214214

215215
# Trends report use a similarly named path.
@@ -219,7 +219,7 @@ def run_on_data_from_scratch(cmd=None):
219219

220220
# Generate a report and upload it to GCS
221221
report_process = subprocess.Popen([
222-
"bash", "report/upload_report.sh", local_results_dir, gcs_report_dir,
222+
"python_path", "report/upload_report.py", local_results_dir, gcs_report_dir,
223223
args.benchmark_set, args.model
224224
])
225225

@@ -363,15 +363,15 @@ def run_standard(cmd=None):
363363
experiment_name = f"{date}-{args.frequency_label}-{args.benchmark_set}"
364364

365365
# Report directory uses the same name as experiment.
366-
# See upload_report.sh on how this is used.
366+
# See upload_report.py on how this is used.
367367
gcs_report_dir = f"{args.sub_dir}/{experiment_name}"
368368

369369
# Trends report use a similarly named path.
370370
gcs_trend_report_path = f"{args.sub_dir}/{experiment_name}.json"
371371

372372
# Generate a report and upload it to GCS
373373
report_process = subprocess.Popen([
374-
"bash", "report/upload_report.sh", local_results_dir, gcs_report_dir,
374+
"python_path", "report/upload_report.py", local_results_dir, gcs_report_dir,
375375
args.benchmark_set, args.model
376376
])
377377

report/upload_report.py

Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
#!/usr/bin/env python3
2+
# Copyright 2025 Google LLC
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the 'License');
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an 'AS IS' BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
import argparse
17+
import os
18+
import time
19+
import subprocess
20+
import logging
21+
from datetime import datetime
22+
from pathlib import Path
23+
from typing import Optional
24+
25+
class ReportUploader:
26+
def __init__(self, results_dir: str, gcs_dir: str, benchmark_set: str, model: str):
27+
self.results_dir = Path(results_dir)
28+
self.gcs_dir = gcs_dir
29+
self.benchmark_set = benchmark_set
30+
self.model = model
31+
self.results_report_dir = Path('results-report')
32+
self.bucket_base_path = 'gs://oss-fuzz-gcb-experiment-run-logs/Result-reports'
33+
34+
logging.basicConfig(
35+
level=logging.INFO,
36+
format='%(asctime)s - %(levelname)s - %(message)s'
37+
)
38+
self.logger = logging.getLogger(__name__)
39+
40+
def _run_command(self, command: list) -> bool:
41+
try:
42+
subprocess.run(command, check=True, capture_output=True, text=True)
43+
return True
44+
except subprocess.CalledProcessError as e:
45+
self.logger.error(f'Command failed: {" ".join(command)}')
46+
self.logger.error(f'Error: {e.stderr}')
47+
return False
48+
49+
def _generate_report(self) -> bool:
50+
self.logger.info('Generating report...')
51+
command = [
52+
'python', '-m', 'report.web',
53+
'-r', str(self.results_dir),
54+
'-b', self.benchmark_set,
55+
'-m', self.model,
56+
'-o', str(self.results_report_dir)
57+
]
58+
return self._run_command(command)
59+
60+
def upload_files(self, source_path: str, destination_path: str,
61+
content_type: Optional[str] = None) -> bool:
62+
command = ['gsutil', '-q', '-m']
63+
64+
if content_type:
65+
command.extend([
66+
'-h', f'Content-Type:{content_type}',
67+
'-h', 'Cache-Control:public, max-age=3600'
68+
])
69+
70+
command.extend(['cp', '-r', source_path, destination_path])
71+
return self._run_command(command)
72+
73+
def upload_report(self) -> bool:
74+
# Upload the generated report to GCS.
75+
self.logger.info('Uploading report...')
76+
bucket_path = f'{self.bucket_base_path}/{self.gcs_dir}'
77+
78+
# Upload HTML files
79+
if not self.upload_files(
80+
f'{self.results_report_dir}/.',
81+
bucket_path,
82+
'text/html'
83+
):
84+
return False
85+
86+
# Upload JSON files
87+
for json_file in self.results_report_dir.glob('**/*.json'):
88+
relative_path = json_file.relative_to(self.results_report_dir)
89+
if not self.upload_files(
90+
str(json_file),
91+
f'{bucket_path}/{relative_path}',
92+
'application/json'
93+
):
94+
return False
95+
96+
# Upload raw results
97+
if not self.upload_files(
98+
str(self.results_dir),
99+
bucket_path
100+
):
101+
return False
102+
103+
self.logger.info(
104+
f'See the published report at https://llm-exp.oss-fuzz.com/Result-reports/{self.gcs_dir}/'
105+
)
106+
return True
107+
108+
def _generate_training_data(self) -> bool:
109+
# Generate and upload training data.
110+
self.logger.info('Generating and uploading training data...')
111+
112+
# Remove existing training data
113+
if Path('training_data').exists():
114+
subprocess.run(['rm', '-rf', 'training_data'])
115+
116+
# Remove existing GCS training data
117+
subprocess.run([
118+
'gsutil', '-q', 'rm', '-r',
119+
f'{self.bucket_base_path}/{self.gcs_dir}/training_data'
120+
], stderr=subprocess.DEVNULL)
121+
122+
# Generate different versions of training data
123+
configurations = [
124+
[],
125+
['--group'],
126+
['--coverage'],
127+
['--coverage', '--group']
128+
]
129+
130+
for config in configurations:
131+
command = [
132+
'python', '-m', 'data_prep.parse_training_data',
133+
'--experiment-dir', str(self.results_dir),
134+
'--save-dir', 'training_data'
135+
] + config
136+
137+
if not self._run_command(command):
138+
return False
139+
140+
# Upload training data
141+
return self.upload_files(
142+
'training_data',
143+
f'{self.bucket_base_path}/{self.gcs_dir}/training_data'
144+
)
145+
146+
def update_report(self) -> bool:
147+
if not self._generate_report():
148+
return False
149+
150+
if not self.upload_report():
151+
return False
152+
153+
if not self._generate_training_data():
154+
return False
155+
156+
return True
157+
158+
def monitor_and_update(self):
159+
# Sleep 5 minutes for the experiment to start.
160+
time.sleep(300)
161+
162+
while not Path('/experiment_ended').exists():
163+
self.logger.info('Experiment is running... Updating report')
164+
self.update_report()
165+
time.sleep(600)
166+
167+
self.logger.info('Experiment finished. Uploading final report...')
168+
self.update_report()
169+
self.logger.info('Final report uploaded.')
170+
171+
172+
def parse_args() -> argparse.Namespace:
173+
parser = argparse.ArgumentParser(description='Upload experiment reports to GCS')
174+
parser.add_argument('results_dir', help='Local directory with experiment results')
175+
parser.add_argument('gcs_dir', help='GCS directory for the report')
176+
parser.add_argument('benchmark_set', help='Benchmark set being used')
177+
parser.add_argument('model', help='LLM model used')
178+
return parser.parse_args()
179+
180+
def main():
181+
args = parse_args()
182+
os.makedirs('results-report', exist_ok=True)
183+
184+
uploader = ReportUploader(
185+
args.results_dir,
186+
args.gcs_dir,
187+
args.benchmark_set,
188+
args.model
189+
)
190+
uploader.monitor_and_update()
191+
192+
if __name__ == '__main__':
193+
main()

report/upload_report.sh

Lines changed: 0 additions & 110 deletions
This file was deleted.

0 commit comments

Comments
 (0)