diff --git a/howso/contributions.amlg b/howso/contributions.amlg index 83e337f1..eeb7b3d6 100644 --- a/howso/contributions.amlg +++ b/howso/contributions.amlg @@ -255,18 +255,24 @@ ;of predicting the action_feature when using vs not using it as a context feature. Sets feature_contributions_map. #!RunRobustContributions (let + (assoc + enabled_num_features_probability_map + (call !ComputeNumEnabledFeaturesProbabilitiesMap (assoc num_features (size all_context_features) )) + num_features (size all_context_features) + + ) ;iterate over all the local cases, for each case, react multiple times with a different robust context feature set each time ;for each react, store the reacted value for features that were used as contexts into each of those features' 'with feature' list ;and store that reacted value for feature that were not used into each of their 'without feature' list ;Then for each feature, average out all the 'with' and all the 'without' values and take the delta to compute the average delta ;repeat for all cases and average those out. - (assoc + (declare (assoc accrued_cases_delta_lists ||(map (lambda (let (assoc case_id (current_value 1) - correct_value (retrieve_from_entity (current_value 1) action_feature) + correct_value (if action_is_nominal (retrieve_from_entity (current_value 1) action_feature) ) case_values_map (zip context_features (retrieve_from_entity (current_value 1) context_features)) local_case_reaction_pairs (list) ;save an inner version in this scope to update per case for time-series flows @@ -305,9 +311,6 @@ (range (lambda (let (assoc - ;list of bits for each remaining (non-removed) feature - remaining_feature_flags (list) - all_flags_set .true ;list of remaining filtered context features filtered_context_features (list) ;set of all removed features @@ -317,42 +320,29 @@ reaction (null) ) - ;for robust computation we randomly (50/50) decide whether to remove a feature from the context - ;by creating a random flag list to match each of the context features - ;ensure that at least one feature is removed - ;this while statement should only loop in rare edge cases - (while all_flags_set - (assign (assoc remaining_feature_flags (map (lambda (< (rand) 0.5)) all_context_features) )) - (if (< (size context_features) (size all_context_features)) - ;ensure TS filtered features stay false - (let - (assoc - original_indices_for_context_features - (unzip all_context_features_indices_map context_features) + ;filter out a random number of context features + (assign (assoc + filtered_context_features + (rand + context_features + ;number of features to enable is probabilistically weighted + (if (and !tsTimeFeature (!= (size context_features) num_features)) + (rand + (call !ComputeNumEnabledFeaturesProbabilitiesMap (assoc num_features (size context_features) )) + ) + + (rand enabled_num_features_probability_map) ) - ;keeps only those flags that match the original indices for remaining context_features - (assign (assoc - remaining_feature_flags - (map - (lambda - (if (contains_value original_indices_for_context_features (current_index)) - (current_value) - .false - ) - ) - remaining_feature_flags - ) - )) + ;unique random features (not with replacement) + .true ) - ) - ;shortcut on the all_flags_set logic so that if there is only one context feature, it can be left in the context - (assign (assoc all_flags_set (and (!= (size context_features) 1) (apply "and" remaining_feature_flags)) )) - ) - ;filter out context features based on the bitmask and also create a set of all the removed features + )) + + ;assoc of all removed features to their residual values (assign (assoc - filtered_context_features (filter (lambda (get remaining_feature_flags (current_index))) all_context_features) without_features_set - (zip (filter (lambda (not (get remaining_feature_flags (current_index)) )) all_context_features) ) + ;filter the action features to be those that are not context features, feature/context_features params may overlap + (remove (zip all_context_features) filtered_context_features) )) (if derive_action_feature @@ -416,18 +406,32 @@ (assoc feature_index (current_index 1)) (declare (assoc - ;for this feature, pull all the 'with' react values, filtering out nulls + ;for this feature, pull all the 'with' react values, filtering out nulls if edit distance feature_reactions_with - (filter (map - (lambda (get (first (current_value)) feature_index) ) - local_case_reaction_pairs - )) - ;for this feature, pull all the 'without' react values, filtering out nulls + (if edit_distance_action_feature + (filter (map + (lambda (get (first (current_value)) feature_index) ) + local_case_reaction_pairs + )) + ;else no need to filter nulls since generalized_mean does it automatically + (map + (lambda (get (first (current_value)) feature_index) ) + local_case_reaction_pairs + ) + ) + ;for this feature, pull all the 'without' react values, filtering out nulls if edit distance feature_reactions_without - (filter (map - (lambda (get (last (current_value)) feature_index) ) - local_case_reaction_pairs - )) + (if edit_distance_action_feature + (filter (map + (lambda (get (last (current_value)) feature_index) ) + local_case_reaction_pairs + )) + ;else no need to filter nulls since generalized_mean does it automatically + (map + (lambda (get (last (current_value)) feature_index) ) + local_case_reaction_pairs + ) + ) )) (if edit_distance_action_feature @@ -494,7 +498,7 @@ )) case_ids ) - ) + )) ;compute feature contributions by averaging out each "column" feature delta across all cases, which is in the format of: ; (list diff --git a/howso/details_influences.amlg b/howso/details_influences.amlg index 4d382ed9..596837b7 100644 --- a/howso/details_influences.amlg +++ b/howso/details_influences.amlg @@ -760,8 +760,15 @@ (assign (assoc feature_contributions_pair_map (if robust - (seq - ;populate local_case_reaction_pairs, then the call !below computes the deltas for each feature + (let + (assoc + ;used by !ComputeRobustContributionReactsPerCase below + enabled_num_features_probability_map + (call !ComputeNumEnabledFeaturesProbabilitiesMap (assoc num_features (size all_context_features) )) + num_features (size all_context_features) + + ) + ;populate local_case_reaction_pairs, then the call !ComputeRobustWithAndWithoutDeltaPerCase computes the deltas for each feature (call !ComputeRobustContributionReactsPerCase) (zip context_features (call !ComputeRobustWithAndWithoutDeltaPerCase)) ) diff --git a/howso/value_contributions.amlg b/howso/value_contributions.amlg index 5adfd6ee..9e1b6c53 100644 --- a/howso/value_contributions.amlg +++ b/howso/value_contributions.amlg @@ -49,6 +49,17 @@ p_parameter (get hyperparam_map "p") dt_parameter (get hyperparam_map "dt") query_feature_attributes_map (get hyperparam_map "featureDomainAttributes") + feature_deviations (get hyperparam_map "featureDeviations") + feature_weights (get hyperparam_map "featureWeights") + + ;used by pc to do feature weight rebalancing + all_feature_weights + (if compute_pc + (if (= (null) (get hyperparam_map "featureMdaMap")) + (get hyperparam_map "featureWeights") + (get hyperparam_map "featureMdaMap") + ) + ) ;store an assoc of lag/rate/delta feature -> lag/order amount for time series flows ts_feature_lag_amount_map (if !tsTimeFeature (call !BuildTSFeatureLagAmountMap)) @@ -57,11 +68,6 @@ (declare (assoc enabled_num_features_probability_map (call !ComputeNumEnabledFeaturesProbabilitiesMap (assoc num_features num_features)) all_features_map (zip features) - feature_deviations (get hyperparam_map "featureDeviations") - - ;in order to compute PC for a continuous action feature, it needs interpolation to be separate from computing the diff to the actual value - ;since diff is output as an absolute value - compute_pc_for_continuous (and compute_pc (not (or feature_is_nominal feature_is_edit_distance))) )) ;list of tuples of : [ AC, PC, directional PC, values ] @@ -74,7 +80,6 @@ ;map of feature -> value for all the case values case_values_map (zip case_features (retrieve_from_entity (current_value 1) case_features) ) time_series_filter_query (list) - feature_weights (get hyperparam_map "featureWeights") context_features context_features categorical_action_probabilities_map (assoc) ) @@ -145,49 +150,77 @@ (let (assoc interpolated_value - (call !InterpolateOrDeriveActionValues (assoc - feature action_feature - output_categorical_action_probabilities .true - candidate_cases_lists - (if (= 0 (size react_context_features)) - [[][][]] - (compute_on_contained_entities - (query_not_in_entity_list [(replace case_id)]) - (if ignore_null_action_feature (query_not_equals action_feature (null))) - (if (size context_condition_filter_query) context_condition_filter_query) - dependent_queries_list - time_series_filter_query - (query_nearest_generalized_distance - k_parameter - (replace react_context_features) - (replace (unzip case_values_map react_context_features)) - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - (if valid_weight_feature weight_feature) - tie_break_random_seed - (null) ;radius - !numericalPrecision - action_feature + (if compute_pc + (call !InterpolateOrDeriveActionValues (assoc + feature action_feature + output_categorical_action_probabilities .true + candidate_cases_lists + (if (= 0 (size react_context_features)) + [[][][]] + (compute_on_contained_entities + (query_not_in_entity_list [(replace case_id)]) + (if ignore_null_action_feature (query_not_equals action_feature (null))) + (if (size context_condition_filter_query) context_condition_filter_query) + dependent_queries_list + time_series_filter_query + (query_nearest_generalized_distance + k_parameter + (replace react_context_features) + (replace (unzip case_values_map react_context_features)) + p_parameter + all_feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + ;feature weight rebalancing feature + action_feature + dt_parameter + (if valid_weight_feature weight_feature) + tie_break_random_seed + (null) ;radius + !numericalPrecision + action_feature + ) ) ) - ) - )) - ) - - (declare (assoc + )) + ) diff - (if (or compute_ac feature_is_nominal feature_is_edit_distance) + (if compute_ac (call !InterpolateAndComputeDiffToCase (assoc feature action_feature - interpolated_value interpolated_value + output_categorical_action_probabilities .true + candidate_cases_lists + (if (= 0 (size react_context_features)) + [[][][]] + (compute_on_contained_entities + (query_not_in_entity_list [(replace case_id)]) + (if ignore_null_action_feature (query_not_equals action_feature (null))) + (if (size context_condition_filter_query) context_condition_filter_query) + dependent_queries_list + time_series_filter_query + (query_nearest_generalized_distance + k_parameter + (replace react_context_features) + (replace (unzip case_values_map react_context_features)) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + dt_parameter + (if valid_weight_feature weight_feature) + tie_break_random_seed + (null) ;radius + !numericalPrecision + action_feature + ) + ) + ) )) ) - )) + ) [ diff @@ -242,46 +275,71 @@ (let (assoc interpolated_value - (call !InterpolateOrDeriveActionValues (assoc - feature action_feature - output_categorical_action_probabilities .true - candidate_cases_lists - (compute_on_contained_entities - (query_not_in_entity_list [(replace case_id)]) - (if ignore_null_action_feature (query_not_equals action_feature (null))) - (if (size context_condition_filter_query) context_condition_filter_query) - dependent_queries_list - time_series_filter_query - (query_nearest_generalized_distance - k_parameter - (replace react_context_features) - (replace (unzip case_values_map react_context_features)) - p_parameter - feature_weights - !queryDistanceTypeMap - query_feature_attributes_map - feature_deviations - (null) - dt_parameter - (if valid_weight_feature weight_feature) - tie_break_random_seed - (null) ;radius - !numericalPrecision - action_feature + (if compute_pc + (call !InterpolateOrDeriveActionValues (assoc + feature action_feature + output_categorical_action_probabilities .true + candidate_cases_lists + (compute_on_contained_entities + (query_not_in_entity_list [(replace case_id)]) + (if ignore_null_action_feature (query_not_equals action_feature (null))) + (if (size context_condition_filter_query) context_condition_filter_query) + dependent_queries_list + time_series_filter_query + (query_nearest_generalized_distance + k_parameter + (replace react_context_features) + (replace (unzip case_values_map react_context_features)) + p_parameter + all_feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + ;feature weight rebalancing + action_feature + dt_parameter + (if valid_weight_feature weight_feature) + tie_break_random_seed + (null) ;radius + !numericalPrecision + action_feature + ) ) - ) - )) - ) - - (declare (assoc + )) + ) diff - (if (or compute_ac feature_is_nominal feature_is_edit_distance) + (if compute_ac (call !InterpolateAndComputeDiffToCase (assoc feature action_feature - interpolated_value interpolated_value + output_categorical_action_probabilities .true + candidate_cases_lists + (compute_on_contained_entities + (query_not_in_entity_list [(replace case_id)]) + (if ignore_null_action_feature (query_not_equals action_feature (null))) + (if (size context_condition_filter_query) context_condition_filter_query) + dependent_queries_list + time_series_filter_query + (query_nearest_generalized_distance + k_parameter + (replace react_context_features) + (replace (unzip case_values_map react_context_features)) + p_parameter + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + (null) + dt_parameter + (if valid_weight_feature weight_feature) + tie_break_random_seed + (null) ;radius + !numericalPrecision + action_feature + ) + ) )) ) - )) + ) [ diff diff --git a/unit_tests/ut_h_dynamic_deviations.amlg b/unit_tests/ut_h_dynamic_deviations.amlg index 636f5de1..5666fd34 100644 --- a/unit_tests/ut_h_dynamic_deviations.amlg +++ b/unit_tests/ut_h_dynamic_deviations.amlg @@ -46,6 +46,39 @@ (print "Price MAE with Dynamic Deviations is as expected (less than 5000): ") (print (get dd_agg_stats ["feature_full_residuals" "price"]) "\n") + + ;ocassionally DD isn't quite below 5000, try again with a smaller DD subtrainee size + (if (<= 5000 (get dd_agg_stats ["feature_full_residuals" "price"])) + (seq + (print "Reanalyzing to try again: ") + (call_entity "howso" "analyze" (assoc + context_features ["log_price" "noise" "other_1" "other_2" "price"] + targeted_model "targetless" + use_dynamic_deviations .true + dynamic_deviations_subtrainee_size 500 + p_values [ 1 ] + k_values [ 21 ] + )) + + (assign (assoc + dd_agg_stats + (get + (call_entity "howso" "react_aggregate" (assoc + context_features features + num_samples 500 + details + (assoc + "feature_full_residuals" .true + "action_condition" {"price" [300000 999999]} + ) + )) + [1 "payload"] + ) + )) + (print (get dd_agg_stats ["feature_full_residuals" "price"]) "\n") + ) + ) + (call assert_true (assoc obs (> 5000 (get dd_agg_stats ["feature_full_residuals" "price"])) )) diff --git a/unit_tests/ut_h_value_contributions.amlg b/unit_tests/ut_h_value_contributions.amlg index 5a4b75c3..f1341445 100644 --- a/unit_tests/ut_h_value_contributions.amlg +++ b/unit_tests/ut_h_value_contributions.amlg @@ -39,12 +39,16 @@ ac_map (get (call_entity "howso" "react_aggregate" (assoc - details { "feature_robust_accuracy_contributions" .true } + details { + "feature_robust_accuracy_contributions" .true + "feature_robust_prediction_contributions" .true + } action_features [ "score"] + feature_influences_action_feature "score" convergence_threshold 0 num_robust_accuracy_contributions_samples 100000 )) - [1 "payload" "feature_robust_accuracy_contributions" "score"] + [1 "payload"] ) )) (print ac_map "ANALYSIS:\n 'name', 'study_time' and 'subject' predict 'scores' the best\n(study_time is highly correlatedb/c it's derived from score and minutes)\n") @@ -55,7 +59,10 @@ (assign (assoc result (call_entity "howso" "react_aggregate" (assoc - details { "value_robust_accuracy_contributions" .true} + details { + "value_robust_accuracy_contributions" .true + "value_robust_prediction_contributions" .true + } value_robust_contributions_action_feature "score" value_robust_contributions_features [ "subject" ] )) @@ -71,10 +78,17 @@ obs (trunc (get result ["value_robust_contributions" "feature_values"]) 3) )) - (print "Average 'subject' AC matches its robust AC:") + (print "Average 'subject' AC matches its avg decomposed robust AC:") (call assert_approximate (assoc obs (generalized_mean (get result ["value_robust_contributions" "ac_values"])) - exp (get ac_map "subject") + exp (get ac_map ["feature_robust_accuracy_contributions" "score" "subject"]) + thresh 0.2 + )) + + (print "Average 'subject' PC matches its avg decomposed robust directional PC:") + (call assert_approximate (assoc + obs (generalized_mean (get result ["value_robust_contributions" "pc_directional_values"])) + exp (get ac_map ["feature_robust_directional_prediction_contributions" "subject"]) thresh 0.2 )) @@ -82,7 +96,10 @@ (assign (assoc result (call_entity "howso" "react_aggregate" (assoc - details { "value_robust_accuracy_contributions" .true} + details { + "value_robust_accuracy_contributions" .true + "value_robust_prediction_contributions" .true + } value_robust_contributions_action_feature "score" value_robust_contributions_features [ "name" ] )) @@ -103,12 +120,41 @@ exp ["cora"] obs (last (get result ["value_robust_contributions" "feature_values"])) )) - (print "Average 'name' AC matches its robust AC:") + (print "Average 'name' AC matches its avg decomposed robust AC:") (call assert_approximate (assoc obs (generalized_mean (get result ["value_robust_contributions" "ac_values"])) - exp (get ac_map "name") + exp (get ac_map ["feature_robust_accuracy_contributions" "score" "name"]) percent 0.1 )) + (print "Average 'name' PC matches its avg decomposed robust PC:") + (call assert_approximate (assoc + obs (generalized_mean (get result ["value_robust_contributions" "pc_values"])) + exp (get ac_map ["feature_robust_prediction_contributions" "name"]) + thresh 0.6 + )) + (print "both computed and avg decomposed PC are > 2.5: ") + (call assert_true (assoc + obs + (and + (> (get ac_map ["feature_robust_prediction_contributions" "name"]) 2.5) + (> (generalized_mean (get result ["value_robust_contributions" "pc_values"])) 2.5) + ) + )) + + (print "Average 'name' PC matches its avg decomposed robust directional PC: ") + (call assert_approximate (assoc + obs (generalized_mean (get result ["value_robust_contributions" "pc_directional_values"])) + exp (get ac_map ["feature_robust_directional_prediction_contributions" "name"]) + thresh 0.5 + )) + (print "both computed and avg decomposed directional PC are < -0.2: ") + (call assert_true (assoc + obs + (and + (< (get ac_map ["feature_robust_directional_prediction_contributions" "name"]) -0.2) + (< (generalized_mean (get result ["value_robust_contributions" "pc_directional_values"])) -0.2) + ) + )) (call exit_if_failures (assoc msg "Values A.C. computed correctly.")) @@ -163,8 +209,8 @@ (print "'Anne + math' and 'Fiona + science' both have high P.C. because they are so extreme: ") (call assert_approximate (assoc obs (trunc (get result ["value_robust_contributions" "pc_values"]) 2) - exp [6 6] - thresh 0.333 + exp [5.5 4.5] + thresh 0.7 )) (print "'Anne + math' has very high directional P.C. because she always does so well, while\n" @@ -172,8 +218,8 @@ ) (call assert_approximate (assoc obs (trunc (get result ["value_robust_contributions" "pc_directional_values"]) 2) - exp [6 -6] - thresh 0.333 + exp [5.5 -3.5] + thresh 0.7 )) (call exit_if_failures (assoc msg "A.C. and P.C. for two features.")) @@ -211,7 +257,7 @@ (print "Average 'study_time' AC approximately matches its robust AC:") (call assert_approximate (assoc obs (generalized_mean (get result ["value_robust_contributions" "ac_values"])) - exp (get ac_map "study_time") + exp (get ac_map ["feature_robust_accuracy_contributions" "score" "study_time"]) percent 0.25 ))