1
1
from __future__ import annotations
2
2
3
+ import copy
3
4
from typing import Any , Literal , Type
4
5
5
6
import pandas as pd
6
- from tabrepo .benchmark .task .openml import OpenMLTaskWrapper , OpenMLS3TaskWrapper
7
7
8
+ from tabrepo .benchmark .result import AGBagResult , BaselineResult , ConfigResult
9
+ from tabrepo .benchmark .task .openml import OpenMLTaskWrapper , OpenMLS3TaskWrapper
8
10
from tabrepo .repository .repo_utils import convert_time_infer_s_from_batch_to_sample as _convert_time_infer_s_from_batch_to_sample
9
11
from tabrepo .utils .cache import AbstractCacheFunction , CacheFunctionPickle , CacheFunctionDummy
10
12
from tabrepo import EvaluationRepository
@@ -212,31 +214,151 @@ def generate_repo_from_experiments(
212
214
213
215
return repo
214
216
215
- def repo_from_results (
216
- self ,
217
- results_lst : list [dict [str , Any ]],
218
- convert_time_infer_s_from_batch_to_sample : bool = True , # FIXME: Remove this, it should be False eventually
219
- ) -> EvaluationRepository :
220
- configs_hyperparameters = self .get_configs_hyperparameters (results_lst = results_lst )
217
+ # TODO: Maybe calibrating model binary pred proba will improve ensemble roc_auc?
218
+ def temp_scale (self , y_val , y_pred_proba_val , method : str = "v2" ):
219
+ init_val = 1.0
220
+ max_iter = 200
221
+ lr = 0.1
222
+ from tabrepo .utils .temp_scaling .calibrators import AutoGluonTemperatureScalingCalibrator , TemperatureScalingCalibrator , AutoGluonTemperatureScalingCalibratorFixed , TemperatureScalingCalibratorFixed
223
+ if method == "v1" :
224
+ calibrator = AutoGluonTemperatureScalingCalibrator (init_val = init_val , max_iter = max_iter , lr = lr )
225
+ elif method == "v2" :
226
+ calibrator = TemperatureScalingCalibrator (max_iter = max_iter , lr = lr )
227
+ elif method == "v1_fix" :
228
+ calibrator = AutoGluonTemperatureScalingCalibratorFixed (init_val = init_val , max_iter = max_iter , lr = lr )
229
+ elif method == "v2_fix" :
230
+ calibrator = TemperatureScalingCalibratorFixed (max_iter = max_iter , lr = lr )
231
+ else :
232
+ raise ValueError (f"Unknown temp_scale method: { method } " )
233
+ calibrator .fit (X = y_pred_proba_val , y = y_val )
234
+ return calibrator
235
+
236
+ def generate_calibrated (self , result , method : str = "v2" , name_suffix : str = "_CAL" ):
237
+ sim_artifact = result ["simulation_artifacts" ]
238
+ metric = sim_artifact ["metric" ]
239
+ from autogluon .core .metrics import get_metric
240
+ problem_type = sim_artifact ["problem_type_transform" ]
241
+ ag_metric = get_metric (metric = metric , problem_type = problem_type )
242
+ y_test = sim_artifact ["y_test" ]
243
+
244
+ y_val = sim_artifact ["y_val" ]
245
+ y_pred_proba_val = sim_artifact ["pred_val" ]
246
+ calibrator = self .temp_scale (y_val = y_val , y_pred_proba_val = y_pred_proba_val , method = method )
247
+ y_pred_proba_test = sim_artifact ["pred_test" ]
248
+ y_pred_proba_test_scaled = calibrator .predict_proba (y_pred_proba_test )
249
+ y_pred_proba_val_scaled = calibrator .predict_proba (y_pred_proba_val )
250
+
251
+ # metric_error_og = ag_metric.error(y_test, y_pred_proba_test)
252
+ metric_error_cal = ag_metric .error (y_test , y_pred_proba_test_scaled )
253
+ metric_error_val_og = ag_metric .error (y_val , y_pred_proba_val )
254
+ metric_error_val_cal = ag_metric .error (y_val , y_pred_proba_val_scaled )
255
+
256
+ if metric_error_val_cal > metric_error_val_og :
257
+ print (f"WARNING:" )
258
+ print (metric_error_val_cal , metric_error_val_og )
259
+ print (result ["framework" ], result ["dataset" ], result ["fold" ])
260
+
261
+ result_calibrated = copy .deepcopy (result )
262
+ result_calibrated ["metric_error" ] = metric_error_cal
263
+ result_calibrated ["metric_error_val" ] = metric_error_val_cal
264
+ result_calibrated ["simulation_artifacts" ]["pred_test" ] = y_pred_proba_test_scaled
265
+ result_calibrated ["simulation_artifacts" ]["pred_val" ] = y_pred_proba_val_scaled
266
+ result_calibrated ["framework" ] = result_calibrated ["framework" ] + name_suffix
267
+ # FIXME: Fix bag children? Should they be calibrated?
268
+
269
+ return result_calibrated
270
+
271
+ def _align_result_input_format (self , result : dict | BaselineResult ) -> BaselineResult :
272
+ """
273
+ Converts results in old format to new format
274
+ Keeps results in new format as-is.
221
275
222
- results_baselines = [result ["df_results" ] for result in results_lst if result ["simulation_artifacts" ] is None ]
223
- df_baselines = pd .concat (results_baselines , ignore_index = True ) if results_baselines else None
276
+ This enables the use of results in the old format alongside results in the new format.
224
277
225
- results_configs = [result for result in results_lst if result ["simulation_artifacts" ] is not None ]
278
+ Parameters
279
+ ----------
280
+ result
226
281
227
- results_lst_simulation_artifacts = [ result [ "simulation_artifacts" ] for result in results_configs ]
228
- results_lst_df = [ result [ "df_results" ] for result in results_configs ]
282
+ Returns
283
+ -------
229
284
230
- if results_lst_df :
231
- df_configs = pd .concat (results_lst_df , ignore_index = True )
232
- if convert_time_infer_s_from_batch_to_sample :
233
- df_configs = _convert_time_infer_s_from_batch_to_sample (df = df_configs , task_metadata = self .task_metadata )
285
+ """
286
+ if isinstance (result , BaselineResult ):
287
+ return result
288
+ assert isinstance (result , dict )
289
+ result_cls = BaselineResult
290
+ sim_artifacts = result .get ("simulation_artifacts" , None )
291
+ if sim_artifacts is not None :
292
+ assert isinstance (sim_artifacts , dict )
293
+ dataset = result ["dataset" ]
294
+ fold = result ["fold" ]
295
+ result_cls = ConfigResult
296
+ if list (sim_artifacts .keys ()) == [dataset ]:
297
+ sim_artifacts = sim_artifacts [dataset ][fold ]
298
+ bag_info = sim_artifacts .get ("bag_info" , None )
299
+ if bag_info is not None :
300
+ assert isinstance (bag_info , dict )
301
+ result_cls = AGBagResult
302
+ result_obj = result_cls (result = result , convert_format = True , inplace = False )
303
+ return result_obj
304
+
305
+ def _calibrate (self , result : ConfigResult ) -> ConfigResult :
306
+ problem_type = result .result ["problem_type" ]
307
+ if problem_type == "multiclass" :
308
+ # FIXME: What about binary?
309
+ result_calibrated = result .generate_calibrated (method = "v2" , name_suffix = "_CAL" )
234
310
else :
235
- df_configs = None
311
+ result_calibrated = copy .deepcopy (result )
312
+ result_calibrated .result ["framework" ] = result_calibrated .result ["framework" ] + "_CAL"
313
+ return result_calibrated
236
314
237
- if df_baselines is not None :
238
- if convert_time_infer_s_from_batch_to_sample :
239
- df_baselines = _convert_time_infer_s_from_batch_to_sample (df = df_baselines , task_metadata = self .task_metadata )
315
+ def repo_from_results (
316
+ self ,
317
+ results_lst : list [dict [str , Any ] | BaselineResult ],
318
+ calibrate : bool = False ,
319
+ include_holdout : bool = False ,
320
+ convert_time_infer_s_from_batch_to_sample : bool = True , # FIXME: Remove this, it should be False eventually
321
+ ) -> EvaluationRepository :
322
+ results_lst : list [BaselineResult ] = [self ._align_result_input_format (result ) for result in results_lst ]
323
+
324
+ results_configs : list [ConfigResult ] = []
325
+ results_baselines : list [BaselineResult ] = []
326
+ for result in results_lst :
327
+ if isinstance (result , ConfigResult ):
328
+ results_configs .append (result )
329
+ else :
330
+ results_baselines .append (result )
331
+
332
+ n_configs = len (results_configs )
333
+ if calibrate :
334
+ results_configs_calibrated = []
335
+ for i , result in enumerate (results_configs ):
336
+ if i % 100 == 0 :
337
+ print (f"Calibrating: { i + 1 } /{ n_configs } \t { result .framework } " )
338
+ results_configs_calibrated .append (self ._calibrate (result = result ))
339
+ results_configs += results_configs_calibrated
340
+
341
+ n_configs = len (results_configs )
342
+ if include_holdout :
343
+ for r_i , result in enumerate (results_configs ):
344
+ if isinstance (result , AGBagResult ):
345
+ if r_i % 100 == 0 :
346
+ print (f"Generating Holdout Results: { r_i + 1 } /{ n_configs } \t { result .framework } " )
347
+ results_new : list [BaselineResult ] = result .bag_artifacts ()
348
+ results_baselines += results_new
349
+
350
+ results_lst_df = [result .compute_df_result () for result in results_configs ]
351
+ results_lst_df_baselines = [result .compute_df_result () for result in results_baselines ]
352
+ df_configs = pd .concat (results_lst_df , ignore_index = True ) if results_lst_df else None
353
+ df_baselines = pd .concat (results_lst_df_baselines , ignore_index = True ) if results_lst_df_baselines else None
354
+
355
+ if df_configs is not None and convert_time_infer_s_from_batch_to_sample :
356
+ df_configs = _convert_time_infer_s_from_batch_to_sample (df = df_configs , task_metadata = self .task_metadata )
357
+ if df_baselines is not None and convert_time_infer_s_from_batch_to_sample :
358
+ df_baselines = _convert_time_infer_s_from_batch_to_sample (df = df_baselines , task_metadata = self .task_metadata )
359
+
360
+ configs_hyperparameters = self .get_configs_hyperparameters (results_configs = results_configs )
361
+ results_lst_simulation_artifacts = [result .generate_old_sim_artifact () for result in results_configs ]
240
362
241
363
# TODO: per-fold pred_proba_test and pred_proba_val (indices?)
242
364
repo : EvaluationRepository = EvaluationRepository .from_raw (
@@ -249,25 +371,12 @@ def repo_from_results(
249
371
250
372
return repo
251
373
252
- def get_configs_hyperparameters (self , results_lst : list [dict ]) -> dict | None :
374
+ def get_configs_hyperparameters (self , results_configs : list [ConfigResult ]) -> dict | None :
253
375
configs_hyperparameters = {}
254
- for result in results_lst :
255
- if "method_metadata" in result and "model_hyperparameters" in result ["method_metadata" ]:
256
- method_name = result ["framework" ]
257
- if method_name in configs_hyperparameters :
258
- continue
259
- method_metadata = result ["method_metadata" ]
260
- model_hyperparameters = method_metadata ["model_hyperparameters" ]
261
- model_cls = method_metadata .get ("model_cls" , None )
262
- model_type = method_metadata .get ("model_type" , None )
263
- name_prefix = method_metadata .get ("name_prefix" , None )
264
-
265
- configs_hyperparameters [method_name ] = dict (
266
- model_cls = model_cls ,
267
- model_type = model_type ,
268
- name_prefix = name_prefix ,
269
- hyperparameters = model_hyperparameters ,
270
- )
376
+ for result in results_configs :
377
+ if result .framework in configs_hyperparameters :
378
+ continue
379
+ configs_hyperparameters [result .framework ] = result .hyperparameters
271
380
if not configs_hyperparameters :
272
381
configs_hyperparameters = None
273
382
return configs_hyperparameters
0 commit comments