2020 resolve_behavior_predictions ,
2121)
2222
23+ RECOMMENDED_ESTIMATOR = "dr"
24+ RECOMMENDED_PROPENSITY_SOURCE_WITH_LOGGED = "auto"
25+ RECOMMENDED_PROPENSITY_SOURCE_FALLBACK = "estimated"
26+ RECOMMENDED_CROSSFIT_ESTIMATORS = frozenset ({"dm" , "dr" , "sndr" , "switch_dr" })
27+
2328
2429@dataclass (frozen = True )
2530class PolicyValueResult :
@@ -47,6 +52,12 @@ class PolicyComparisonSummary:
4752 inference_warnings : tuple [str , ...] = field (default_factory = tuple )
4853 diagnostics : PolicyDiagnostics | None = None
4954 notes : tuple [str , ...] = field (default_factory = tuple )
55+ info_notes : tuple [str , ...] = field (default_factory = tuple )
56+ diagnostic_warnings : tuple [str , ...] = field (default_factory = tuple )
57+ trust_notes : tuple [str , ...] = field (default_factory = tuple )
58+ trust_level : str = "ok"
59+ recommendation : Optional [str ] = None
60+ recommended_defaults : dict [str , object ] = field (default_factory = dict )
5061 propensity_source : Optional [str ] = None
5162 propensity_column : Optional [str ] = None
5263 nuisance_diagnostics : Optional [NuisanceDiagnostics ] = None
@@ -60,7 +71,15 @@ def to_dict(self) -> dict:
6071 "Delta" : self .delta ,
6172 "diagnostics" : self .diagnostics .to_dict () if self .diagnostics is not None else {},
6273 "notes" : list (self .notes ),
74+ "info_notes" : list (self .info_notes ),
75+ "diagnostic_warnings" : list (self .diagnostic_warnings ),
76+ "trust_notes" : list (self .trust_notes ),
77+ "trust_level" : self .trust_level ,
6378 }
79+ if self .recommendation is not None :
80+ out ["recommendation" ] = self .recommendation
81+ if self .recommended_defaults :
82+ out ["recommended_defaults" ] = self .recommended_defaults
6483 if self .v_a_ci is not None :
6584 out ["V_A_CI" ] = self .v_a_ci
6685 if self .v_b_ci is not None :
@@ -90,6 +109,49 @@ def to_dict(self) -> dict:
90109 return out
91110
92111
112+ def _recommended_defaults (estimator : str ) -> dict [str , object ]:
113+ return {
114+ "preferred_estimator_general_use" : RECOMMENDED_ESTIMATOR ,
115+ "preferred_propensity_mode_when_logged_available" : RECOMMENDED_PROPENSITY_SOURCE_WITH_LOGGED ,
116+ "preferred_propensity_fallback_when_logged_unavailable" : RECOMMENDED_PROPENSITY_SOURCE_FALLBACK ,
117+ "crossfit_recommended_for_estimator" : estimator in RECOMMENDED_CROSSFIT_ESTIMATORS ,
118+ }
119+
120+
121+ def _build_trust_metadata (
122+ * ,
123+ estimator : str ,
124+ use_crossfit : bool ,
125+ propensity_notes : tuple [str , ...],
126+ diagnostic_warnings : tuple [str , ...],
127+ inference_warnings : tuple [str , ...],
128+ ) -> tuple [tuple [str , ...], tuple [str , ...], str , Optional [str ]]:
129+ info_notes = list (dict .fromkeys (propensity_notes ))
130+ trust_notes : list [str ] = []
131+ risk_score = 0
132+ if diagnostic_warnings :
133+ risk_score += len (diagnostic_warnings )
134+ trust_notes .append ("diagnostics_warnings_present_review_weight_overlap_metrics" )
135+ if inference_warnings :
136+ risk_score += len (inference_warnings )
137+ trust_notes .append ("inference_warnings_present_ci_and_p_value_less_stable" )
138+ if estimator in RECOMMENDED_CROSSFIT_ESTIMATORS and not use_crossfit :
139+ info_notes .append ("crossfit_optional_recommendation_for_bias_hardening" )
140+ if any (w in {"low_ess_ratio" , "heavy_weight_tail" , "extreme_max_weight" } for w in diagnostic_warnings ):
141+ risk_score += 1
142+ trust_notes .append ("trust_elevated_concern_unstable_importance_weights" )
143+
144+ trust_level = "ok"
145+ recommendation = None
146+ if risk_score >= 3 :
147+ trust_level = "elevated_concern"
148+ recommendation = "Treat comparison as directional; improve overlap/weights or collect more representative logs."
149+ elif risk_score > 0 :
150+ trust_level = "caution"
151+ recommendation = "Review diagnostics and inference warnings before making product decisions."
152+ return tuple (info_notes ), tuple (trust_notes ), trust_level , recommendation
153+
154+
93155@dataclass (frozen = True )
94156class MultiMetricComparisonResult :
95157 estimator : str
@@ -263,14 +325,29 @@ def point_on(part: pd.DataFrame) -> float:
263325 )
264326
265327 if not with_ci :
328+ diag_warnings = tuple (diag .warnings )
329+ info_notes , trust_notes , trust_level , recommendation = _build_trust_metadata (
330+ estimator = estimator ,
331+ use_crossfit = use_crossfit ,
332+ propensity_notes = propensity_notes ,
333+ diagnostic_warnings = diag_warnings ,
334+ inference_warnings = tuple (),
335+ )
336+ notes = tuple (dict .fromkeys (info_notes + diag_warnings + trust_notes ))
266337 return PolicyComparisonSummary (
267338 estimator = estimator ,
268339 target = target ,
269340 v_a = float (v_a ),
270341 v_b = float (v_b ),
271342 delta = float (v_b - v_a ),
272343 diagnostics = diag ,
273- notes = propensity_notes + tuple (diag .warnings ),
344+ notes = notes ,
345+ info_notes = info_notes ,
346+ diagnostic_warnings = diag_warnings ,
347+ trust_notes = trust_notes ,
348+ trust_level = trust_level ,
349+ recommendation = recommendation ,
350+ recommended_defaults = _recommended_defaults (estimator ),
274351 propensity_source = diag .propensity_source or resolved_source ,
275352 propensity_column = diag .propensity_column or resolved_propensity_col ,
276353 nuisance_diagnostics = nuisance_diag ,
@@ -291,7 +368,15 @@ def estimator_pair(part: pd.DataFrame):
291368 inference_warnings = tuple (inf .get ("inference_warnings" , []))
292369 if fallback_triggered ["value" ]:
293370 inference_warnings = inference_warnings + (external_nuisance_bootstrap_warning ,)
294- notes = propensity_notes + tuple (diag .warnings ) + inference_warnings
371+ diag_warnings = tuple (diag .warnings )
372+ info_notes , trust_notes , trust_level , recommendation = _build_trust_metadata (
373+ estimator = estimator ,
374+ use_crossfit = use_crossfit ,
375+ propensity_notes = propensity_notes ,
376+ diagnostic_warnings = diag_warnings ,
377+ inference_warnings = inference_warnings ,
378+ )
379+ notes = tuple (dict .fromkeys (info_notes + diag_warnings + inference_warnings + trust_notes ))
295380 return PolicyComparisonSummary (
296381 estimator = estimator ,
297382 target = target ,
@@ -310,6 +395,12 @@ def estimator_pair(part: pd.DataFrame):
310395 inference_warnings = inference_warnings ,
311396 diagnostics = diag ,
312397 notes = notes ,
398+ info_notes = info_notes ,
399+ diagnostic_warnings = diag_warnings ,
400+ trust_notes = trust_notes ,
401+ trust_level = trust_level ,
402+ recommendation = recommendation ,
403+ recommended_defaults = _recommended_defaults (estimator ),
313404 propensity_source = diag .propensity_source or resolved_source ,
314405 propensity_column = diag .propensity_column or resolved_propensity_col ,
315406 nuisance_diagnostics = nuisance_diag ,
0 commit comments