1
+ #!/usr/bin/env python3
2
+ # Copyright 2025 Google LLC
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the 'License');
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an 'AS IS' BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import argparse
17
+ import os
18
+ import time
19
+ import subprocess
20
+ import logging
21
+ from datetime import datetime
22
+ from pathlib import Path
23
+ from typing import Optional
24
+
25
+ class ReportUploader :
26
+ def __init__ (self , results_dir : str , gcs_dir : str , benchmark_set : str , model : str ):
27
+ self .results_dir = Path (results_dir )
28
+ self .gcs_dir = gcs_dir
29
+ self .benchmark_set = benchmark_set
30
+ self .model = model
31
+ self .results_report_dir = Path ('results-report' )
32
+ self .bucket_base_path = 'gs://oss-fuzz-gcb-experiment-run-logs/Result-reports'
33
+
34
+ logging .basicConfig (
35
+ level = logging .INFO ,
36
+ format = '%(asctime)s - %(levelname)s - %(message)s'
37
+ )
38
+ self .logger = logging .getLogger (__name__ )
39
+
40
+ def _run_command (self , command : list ) -> bool :
41
+ try :
42
+ subprocess .run (command , check = True , capture_output = True , text = True )
43
+ return True
44
+ except subprocess .CalledProcessError as e :
45
+ self .logger .error (f'Command failed: { " " .join (command )} ' )
46
+ self .logger .error (f'Error: { e .stderr } ' )
47
+ return False
48
+
49
+ def _generate_report (self ) -> bool :
50
+ self .logger .info ('Generating report...' )
51
+ command = [
52
+ 'python' , '-m' , 'report.web' ,
53
+ '-r' , str (self .results_dir ),
54
+ '-b' , self .benchmark_set ,
55
+ '-m' , self .model ,
56
+ '-o' , str (self .results_report_dir )
57
+ ]
58
+ return self ._run_command (command )
59
+
60
+ def upload_files (self , source_path : str , destination_path : str ,
61
+ content_type : Optional [str ] = None ) -> bool :
62
+ command = ['gsutil' , '-q' , '-m' ]
63
+
64
+ if content_type :
65
+ command .extend ([
66
+ '-h' , f'Content-Type:{ content_type } ' ,
67
+ '-h' , 'Cache-Control:public, max-age=3600'
68
+ ])
69
+
70
+ command .extend (['cp' , '-r' , source_path , destination_path ])
71
+ return self ._run_command (command )
72
+
73
+ def upload_report (self ) -> bool :
74
+ # Upload the generated report to GCS.
75
+ self .logger .info ('Uploading report...' )
76
+ bucket_path = f'{ self .bucket_base_path } /{ self .gcs_dir } '
77
+
78
+ # Upload HTML files
79
+ if not self .upload_files (
80
+ f'{ self .results_report_dir } /.' ,
81
+ bucket_path ,
82
+ 'text/html'
83
+ ):
84
+ return False
85
+
86
+ # Upload JSON files
87
+ for json_file in self .results_report_dir .glob ('**/*.json' ):
88
+ relative_path = json_file .relative_to (self .results_report_dir )
89
+ if not self .upload_files (
90
+ str (json_file ),
91
+ f'{ bucket_path } /{ relative_path } ' ,
92
+ 'application/json'
93
+ ):
94
+ return False
95
+
96
+ # Upload raw results
97
+ if not self .upload_files (
98
+ str (self .results_dir ),
99
+ bucket_path
100
+ ):
101
+ return False
102
+
103
+ self .logger .info (
104
+ f'See the published report at https://llm-exp.oss-fuzz.com/Result-reports/{ self .gcs_dir } /'
105
+ )
106
+ return True
107
+
108
+ def _generate_training_data (self ) -> bool :
109
+ # Generate and upload training data.
110
+ self .logger .info ('Generating and uploading training data...' )
111
+
112
+ # Remove existing training data
113
+ if Path ('training_data' ).exists ():
114
+ subprocess .run (['rm' , '-rf' , 'training_data' ])
115
+
116
+ # Remove existing GCS training data
117
+ subprocess .run ([
118
+ 'gsutil' , '-q' , 'rm' , '-r' ,
119
+ f'{ self .bucket_base_path } /{ self .gcs_dir } /training_data'
120
+ ], stderr = subprocess .DEVNULL )
121
+
122
+ # Generate different versions of training data
123
+ configurations = [
124
+ [],
125
+ ['--group' ],
126
+ ['--coverage' ],
127
+ ['--coverage' , '--group' ]
128
+ ]
129
+
130
+ for config in configurations :
131
+ command = [
132
+ 'python' , '-m' , 'data_prep.parse_training_data' ,
133
+ '--experiment-dir' , str (self .results_dir ),
134
+ '--save-dir' , 'training_data'
135
+ ] + config
136
+
137
+ if not self ._run_command (command ):
138
+ return False
139
+
140
+ # Upload training data
141
+ return self .upload_files (
142
+ 'training_data' ,
143
+ f'{ self .bucket_base_path } /{ self .gcs_dir } /training_data'
144
+ )
145
+
146
+ def update_report (self ) -> bool :
147
+ if not self ._generate_report ():
148
+ return False
149
+
150
+ if not self .upload_report ():
151
+ return False
152
+
153
+ if not self ._generate_training_data ():
154
+ return False
155
+
156
+ return True
157
+
158
+ def monitor_and_update (self ):
159
+ # Sleep 5 minutes for the experiment to start.
160
+ time .sleep (300 )
161
+
162
+ while not Path ('/experiment_ended' ).exists ():
163
+ self .logger .info ('Experiment is running... Updating report' )
164
+ self .update_report ()
165
+ time .sleep (600 )
166
+
167
+ self .logger .info ('Experiment finished. Uploading final report...' )
168
+ self .update_report ()
169
+ self .logger .info ('Final report uploaded.' )
170
+
171
+
172
+ def parse_args () -> argparse .Namespace :
173
+ parser = argparse .ArgumentParser (description = 'Upload experiment reports to GCS' )
174
+ parser .add_argument ('results_dir' , help = 'Local directory with experiment results' )
175
+ parser .add_argument ('gcs_dir' , help = 'GCS directory for the report' )
176
+ parser .add_argument ('benchmark_set' , help = 'Benchmark set being used' )
177
+ parser .add_argument ('model' , help = 'LLM model used' )
178
+ return parser .parse_args ()
179
+
180
+ def main ():
181
+ args = parse_args ()
182
+ os .makedirs ('results-report' , exist_ok = True )
183
+
184
+ uploader = ReportUploader (
185
+ args .results_dir ,
186
+ args .gcs_dir ,
187
+ args .benchmark_set ,
188
+ args .model
189
+ )
190
+ uploader .monitor_and_update ()
191
+
192
+ if __name__ == '__main__' :
193
+ main ()
0 commit comments