diff --git a/howso/feature_residuals.amlg b/howso/feature_residuals.amlg index d40e20c47..f6becf1d7 100644 --- a/howso/feature_residuals.amlg +++ b/howso/feature_residuals.amlg @@ -883,7 +883,7 @@ (if in_analyze (zip nominal_features - (map + ||(map (lambda (zip (lambda @@ -907,7 +907,7 @@ (zip nominal_features ;iterate over nominal features, where (current_value) is a list of react tuples for each feature - (map + ||(map (lambda (map (lambda @@ -969,211 +969,177 @@ ) )) - ;accuracy is: num correct predictions / all predictions - (assign (assoc - accuracy_map - (map - (lambda - (let - ;grab the current action feature's confusion matrix - (assoc confusion_matrix (get confusion_matrix_map (current_index 1))) - (if (size confusion_matrix) - (/ - ;correct predictions count - (apply "+" (values - ;get correct prediction count for each class - (map - (lambda (or (get (current_value) (current_index)))) - confusion_matrix - ) - )) - ;total predictions count - (apply "+" (values - ;add up all prediction counts for all classes - (map (lambda (apply "+" (filter (values (current_value))))) confusion_matrix) - )) - ) - ) - ) - ) - (zip nominal_features) - ) - )) - - ;recall for a single class is = TruePositives / (TruePositives + FalseNegatives) - (assign (assoc - recall_map - (map + ;accumulate nominal feature stats (accuracy, precision, recall, mcc) in one pass + (declare (assoc + nominal_stats_maps + ||(map (lambda (let ;grab the current action feature's confusion matrix (assoc confusion_matrix (get confusion_matrix_map (current_index 1))) (if (size confusion_matrix) - ;for each row in confusion matrix, average out: correct / total of row - (/ - (apply "+" - (values (map - (lambda (let - (assoc row_total (apply "+" (values (current_value 1))) ) - ;if there were no predictions at all for this class, prevent divide by zero - (if (> row_total 0) - (/ (or (get (current_value) (current_index)) 0) row_total) - 0 + (assoc + ;accuracy is: num correct predictions / all predictions + accuracy + (/ + ;correct predictions count + (apply "+" (values + ;get correct prediction count for each class + (map + (lambda (or (get (current_value) (current_index)))) + confusion_matrix ) )) - confusion_matrix - )) - ) - ;The 'confusion_matrix' may not be a square matrix. Divides by the length of the rows or columns - ; as if the matrix was square by taking the max of the number of non-empty rows or the number - ; of uniquely predicted column indices. - (max - ;Number of rows, filter out empty rows - (size - (filter (lambda (size (current_value))) confusion_matrix) + ;total predictions count + (apply "+" (values + ;add up all prediction counts for all classes + (map (lambda (apply "+" (filter (values (current_value))))) confusion_matrix) + )) ) - ;Number of unique columns - (size (values - (apply "append" - (map - (lambda (indices (current_value))) - (values confusion_matrix) - ) - ) - .true - )) - ) - ) - ) - )) - (zip nominal_features) - ) - )) - ;precision for a single class is = TruePositives / (TruePositives + FalsePositives) - (assign (assoc - precision_map - (map - (lambda - (let - ;grab the current action feature's confusion matrix - (assoc confusion_matrix (get confusion_matrix_map (current_index 1))) - (if (size confusion_matrix) - ;for each column in confusion matrix, average out: correct / total of column - (/ - ;correct predictions count - (apply "+" - (values (map - (lambda (let - (assoc - class (current_index 1) - column_total (null) - ) - (assign (assoc - column_total - (apply "+" - (values (map - (lambda (or (get (current_value) class) 0) ) - confusion_matrix - )) + ;recall for a single class is = TruePositives / (TruePositives + FalseNegatives) + recall + ;for each row in confusion matrix, average out: correct / total of row + (/ + (apply "+" + (values (map + (lambda (let + (assoc row_total (apply "+" (values (current_value 1))) ) + ;if there were no predictions at all for this class, prevent divide by zero + (if (> row_total 0) + (/ (or (get (current_value) (current_index)) 0) row_total) + 0 ) + )) + confusion_matrix )) - ;if there were no guesses for this class at all, prevent divide by zero - (if (> column_total 0) - (/ (or (get (current_value) class) 0) column_total) - 0 - ) - )) - confusion_matrix - )) - ) - ;The 'confusion_matrix' may not be a square matrix. Divides by the length of the rows or columns - ; as if the matrix was square by taking the max of the number of non-empty rows or the number - ; of uniquely predicted column indices. - (max - ;Number of rows, filter out empty rows - (size - (filter (lambda (size (current_value))) confusion_matrix) - ) - ;Number of unique columns - (size (values - (apply "append" - (map - (lambda (indices (current_value))) - (values confusion_matrix) + ) + ;The 'confusion_matrix' may not be a square matrix. Divides by the length of the rows or columns + ; as if the matrix was square by taking the max of the number of non-empty rows or the number + ; of uniquely predicted column indices. + (max + ;Number of rows, filter out empty rows + (size + (filter (lambda (size (current_value))) confusion_matrix) ) + ;Number of unique columns + (size (values + (apply "append" + (map + (lambda (indices (current_value))) + (values confusion_matrix) + ) + ) + .true + )) ) - .true - )) - ) - ) - ) - ) - ) - (zip nominal_features) - ) - )) + ) - ; Generalized multi-class formula for Matthews Correlation Coefficient (mcc) is located https://en.wikipedia.org/wiki/Phi_coefficient#Multiclass_case - (assign (assoc - mcc_map - (map - (lambda - (let - ;grab the current action feature's confusion matrix - (assoc confusion_matrix (get confusion_matrix_map (current_index 1))) - (if (size confusion_matrix) - (let - (assoc - ;get the total correctly predicted counts, variable c in the mcc formula - total_predicted_correct - (apply "+" (values - (map (lambda (or (get (current_value) (current_index))) ) confusion_matrix) - )) - ;get the total number of samples, variable s in the mcc formula - total_samples - (apply "+" (values - (map (lambda (apply "+" (filter (values (current_value))))) confusion_matrix) - )) - ;get a list containing the true counts of each class, vector t in the mcc formula - true_counts - (append (values - (map - (lambda (apply "+" (filter (values (get confusion_matrix (current_index)))))) + ;precision for a single class is = TruePositives / (TruePositives + FalsePositives) + precision + ;for each column in confusion matrix, average out: correct / total of column + (/ + ;correct predictions count + (apply "+" + (values (map + (lambda (let + (assoc + class (current_index 1) + column_total (null) + ) + (assign (assoc + column_total + (apply "+" + (values (map + (lambda (or (get (current_value) class) 0) ) + confusion_matrix + )) + ) + )) + ;if there were no guesses for this class at all, prevent divide by zero + (if (> column_total 0) + (/ (or (get (current_value) class) 0) column_total) + 0 + ) + )) confusion_matrix + )) + ) + ;The 'confusion_matrix' may not be a square matrix. Divides by the length of the rows or columns + ; as if the matrix was square by taking the max of the number of non-empty rows or the number + ; of uniquely predicted column indices. + (max + ;Number of rows, filter out empty rows + (size + (filter (lambda (size (current_value))) confusion_matrix) ) - )) - ;get a list containing the predicted counts of each class, vector p in the mcc formula - predicted_counts - (append (values - (map - (lambda - (apply "+" (values (map - ;current_index 1 is the predicted class that is being aggregated - (lambda (or (get (current_value) (current_index 1))) 0) + ;Number of unique columns + (size (values + (apply "append" + (map + (lambda (indices (current_value))) + (values confusion_matrix) + ) + ) + .true + )) + ) + ) + + ;generalized multi-class formula for Matthews Correlation Coefficient (mcc) is located https://en.wikipedia.org/wiki/Phi_coefficient#Multiclass_case + mcc + (let + (assoc + ;get the total correctly predicted counts, variable c in the mcc formula + total_predicted_correct + (apply "+" (values + (map (lambda (or (get (current_value) (current_index))) ) confusion_matrix) + )) + ;get the total number of samples, variable s in the mcc formula + total_samples + (apply "+" (values + (map (lambda (apply "+" (filter (values (current_value))))) confusion_matrix) + )) + ;get a list containing the true counts of each class, vector t in the mcc formula + true_counts + (append (values + (map + (lambda (apply "+" (filter (values (get confusion_matrix (current_index)))))) + confusion_matrix + ) + )) + ;get a list containing the predicted counts of each class, vector p in the mcc formula + predicted_counts + (append (values + (map + (lambda + (apply "+" (values (map + ;current_index 1 is the predicted class that is being aggregated + (lambda (or (get (current_value) (current_index 1))) 0) + confusion_matrix + ))) + ) confusion_matrix - ))) + ) + )) + ) + + ;calculates the mcc + (declare (assoc + mcc_numerator + (- (* total_predicted_correct total_samples) (dot_product true_counts predicted_counts)) + mcc_denominator + (* + (sqrt (- (pow total_samples 2) (dot_product true_counts true_counts))) + (sqrt (- (pow total_samples 2) (dot_product predicted_counts predicted_counts))) ) - confusion_matrix - ) )) - ) - ;calculates the mcc - (declare (assoc - mcc_numerator - (- (* total_predicted_correct total_samples) (dot_product true_counts predicted_counts)) - mcc_denominator - (* - (sqrt (- (pow total_samples 2) (dot_product true_counts true_counts))) - (sqrt (- (pow total_samples 2) (dot_product predicted_counts predicted_counts))) + (if (or (= mcc_numerator 0) (= mcc_denominator 0)) + 0 + (/ mcc_numerator mcc_denominator) ) - )) - - (if (or (= mcc_numerator 0) (= mcc_denominator 0)) - 0 - (/ mcc_numerator mcc_denominator) - ) + ) ) ) ) @@ -1182,6 +1148,13 @@ ) )) + (assign (assoc + accuracy_map (map (lambda (get (current_value) "accuracy")) nominal_stats_maps) + precision_map (map (lambda (get (current_value) "precision")) nominal_stats_maps) + recall_map (map (lambda (get (current_value) "recall")) nominal_stats_maps) + mcc_map (map (lambda (get (current_value) "mcc")) nominal_stats_maps) + )) + (if (= (null) param_path) (declare (assoc param_path (get hyperparam_map "paramPath"))) ) @@ -1211,7 +1184,6 @@ ) )) - ;copy over all the stats for non-primary shared features (if use_shared_deviations (map diff --git a/howso/react_aggregate.amlg b/howso/react_aggregate.amlg index be6fce1c6..d1c36c4d7 100644 --- a/howso/react_aggregate.amlg +++ b/howso/react_aggregate.amlg @@ -118,11 +118,11 @@ confusion_matrix_min_count 15 ;{type "list" values "string"} ;features for which to determine classes or values that cause the highest accuracy and/or prediction contribution to the - ;specified 'value_robust_contributions_action_features' + ;specified 'value_robust_contributions_action_feature' value_robust_contributions_features (null) ;{type "string"} ;action feature being predicted whose accuracy and prediction affects should be computed by 'value_robust_contributions_features' - value_robust_contributions_action_features (null) + value_robust_contributions_action_feature (null) ;{type "number" exclusive_min 0} ;number of maximum buckets to bin continuous values into, defaults to 30 when unspecified value_robust_contributions_num_buckets 30 @@ -200,10 +200,10 @@ ; context_num_samples: optional, limit on the number of context cases when 'context_condition_precision' is set to 'similar'. ; If null, will be set to k. default is null ; value_robust_accuracy_contributions: optional, none/true/false. - ; Perform a focused computation to determine how all the individual values of specified 'value_robust_contributions_features' affect the accuracy of 'value_robust_contributions_action_features'. + ; Perform a focused computation to determine how all the individual values of specified 'value_robust_contributions_features' affect the accuracy of 'value_robust_contributions_action_feature'. ; Outputs under 'value_robust_contributions' ; value_robust_prediction_contributions: optional, none/true/false. - ; Perform a focused computation to determine how all the individual values of specified 'value_robust_contributions_features' affect the predictions of 'value_robust_contributions_action_features'. + ; Perform a focused computation to determine how all the individual values of specified 'value_robust_contributions_features' affect the predictions of 'value_robust_contributions_action_feature'. ; Outputs under 'value_robust_contributions' ; ) details (null) @@ -638,7 +638,7 @@ )) ) value_robust_contributions_case_ids - (if (or (get details "value_robust_accuracy_contributions") (get details "value_robust_prediction_contributions)")) + (if (or (get details "value_robust_accuracy_contributions") (get details "value_robust_prediction_contributions")) (call !SampleCases (assoc num (if num_robust_accuracy_contributions_samples @@ -1108,7 +1108,7 @@ weight_feature weight_feature hyperparam_map custom_hyperparam_map context_condition_filter_query context_condition_filter_query - action_feature value_robust_contributions_action_features + action_feature value_robust_contributions_action_feature value_robust_contributions_features value_robust_contributions_features max_num_buckets value_robust_contributions_num_buckets valid_weight_feature valid_weight_feature diff --git a/howso/residuals.amlg b/howso/residuals.amlg index 36fcce3a9..b8cbcd65f 100644 --- a/howso/residuals.amlg +++ b/howso/residuals.amlg @@ -188,27 +188,45 @@ ; i.e.: { "residual_map" : { "featureA" : 3.4... } } ; using_shared_deviations: boolean, default (null). If true, will only use parent features and expand according to the shared deviations groups #!ExpandResidualValuesToUncertainty - (set - feature_residuals_map - "residual_map" - (let (assoc - temp_residual_map - (map + (declare + (assoc + has_shared_deviations (and using_shared_deviations (size !sharedDeviationsMap)) + ) + + ;check if any feature residuals need to be modified + (declare (assoc + need_modification_residuals_map + (filter (map (lambda ;inactive features are bound to min residual (if (contains_index !inactiveFeaturesMap (current_index)) (get !cachedFeatureMinResidualMap (current_index)) - ;set upper bound to nominals to be max nominal deviation, accounting for imbalanced classes + ;if residual is too small, replace it with the minimim allowed + (< + (current_value) + (max + (get !cachedFeatureMinResidualMap (current_index)) + (get !userSpecifiedFeatureErrorsMap (current_index)) + ) + ) + (max + (get !cachedFeatureMinResidualMap (current_index)) + (get !userSpecifiedFeatureErrorsMap (current_index)) + ) + + (= (null) (current_value)) + (max (get !cachedFeatureMinResidualMap (current_index)) (get !userSpecifiedFeatureErrorsMap (current_index)) ) + + ;else nominal may need to set upper bound, accounting for imbalanced classes (contains_index !nominalsMap (current_index)) (let (assoc ;map of class -> count feature_grouping - (if (size !sharedDeviationsMap) + (if has_shared_deviations (call !GetSharedDeviationGrouping (assoc feature_group_to_retrieve (current_index 2))) ) - current_feature (current_index 1) ) (declare (assoc class_counts_map @@ -229,69 +247,47 @@ ;list of each feature's class count assocs (map (lambda - (compute_on_contained_entities - (query_value_masses (current_value) (null)) - ) + (compute_on_contained_entities (query_value_masses (current_value) (null)) ) ) feature_grouping ) ) - - (compute_on_contained_entities - (query_value_masses current_feature (null)) - ) + ;else a map of class -> count + (compute_on_contained_entities (query_value_masses (current_index 1) (null)) ) ) - )) - ;do not max cap nominal deviations if less than 2 classes have been trained - (if (<= (size class_counts_map) 1) - (max - (get !cachedFeatureMinResidualMap (current_index)) - (current_value) - (get !userSpecifiedFeatureErrorsMap (current_index)) - ) + )) - ;else cap the max nominal deviation + ;only need to cap max nominal deviations if 2 or more classes have been trained + (if (> (size class_counts_map) 1) (let - (assoc total_count (apply "+" (values class_counts_map)) ) - - (min + (assoc + total_count (apply "+" (values class_counts_map)) + ) + (declare (assoc ;nominal max deviation is the sum of: each class's probability multiplied by probability of getting it wrong - (apply "+" - (map - (lambda (let - (assoc class_prob (* (/ (current_value 1) total_count)) ) - (* class_prob (- 1 class_prob)) - )) - (values class_counts_map) + nominal_max_deviation + (apply "+" + (map + (lambda (let + (assoc class_prob (/ (current_value 1) total_count) ) + (* class_prob (- 1 class_prob)) + )) + (values class_counts_map) + ) ) - ) - - (max - (get !cachedFeatureMinResidualMap (current_index)) - (current_value) - (get !userSpecifiedFeatureErrorsMap (current_index)) - ) + )) + (if (> (current_value) nominal_max_deviation) + nominal_max_deviation ) ) ) ) - - ;else continuous value don't have upper bounds, set the lower bound - (max - (get !cachedFeatureMinResidualMap (current_index)) - (current_value) - (get !userSpecifiedFeatureErrorsMap (current_index)) - ) ) ) - ;if using shared deviations, map over a reduced residuals map with only features that are not ; in a shared deviations group or are the primary keys of a shared deviations group - (if (and - using_shared_deviations - (size !sharedDeviationsMap) - ) + (if has_shared_deviations (remove (get feature_residuals_map "residual_map") !sharedDeviationsNonPrimaryFeatures @@ -299,19 +295,51 @@ (get feature_residuals_map "residual_map") ) - ) + )) + )) + + (if has_shared_deviations + (set + feature_residuals_map + "residual_map" + (call !ExpandForSharedDeviations (assoc + compressed_values + (if (size need_modification_residuals_map) + ;iterate over the residual map and use the values from need_modification_residuals_map where those values are defined + (map + (lambda + (if (!= (null) (last (current_value))) (last (current_value)) (first (current_value)) ) + ) + (remove (get feature_residuals_map "residual_map") !sharedDeviationsNonPrimaryFeatures) + need_modification_residuals_map + ) + + (remove (get feature_residuals_map "residual_map") !sharedDeviationsNonPrimaryFeatures) + ) + )) ) - (if (and - using_shared_deviations - (size !sharedDeviationsMap) + ;else output the modified residuals + (size need_modification_residuals_map) + (set + feature_residuals_map + "residual_map" + ;iterate over the residual map and use the values from need_modification_residuals_map where those values are defined + (map + (lambda + (if (!= (null) (last (current_value))) (last (current_value)) (first (current_value)) ) + ) + (get feature_residuals_map "residual_map") + need_modification_residuals_map ) - (call !ExpandForSharedDeviations (assoc compressed_values temp_residual_map)) - temp_residual_map ) + + ;else output unmodified residuals + feature_residuals_map ) ) + ;helper method to sample cases from the dataset, outputs a list of case ids ;if there are more cases than the sample size, randomly select that many cases, by default cases are in random order #!SelectCaseIdsForResiduals diff --git a/howso/train.amlg b/howso/train.amlg index 3d0a10040..b27b3469a 100644 --- a/howso/train.amlg +++ b/howso/train.amlg @@ -1218,8 +1218,8 @@ session (get_value session) session_training_index (if (size ts_ablated_indices_map) - (+ next_trained_index (get ts_ablated_indices_map (+ input_case_index (current_value 1)))) - (+ next_trained_index (current_value 1)) + (+ session_training_index (get ts_ablated_indices_map (+ input_case_index (current_value 1)))) + (+ session_training_index (current_value 1)) ) )) )) diff --git a/howso/train_utilities.amlg b/howso/train_utilities.amlg index 6bd003d4a..f55891654 100644 --- a/howso/train_utilities.amlg +++ b/howso/train_utilities.amlg @@ -289,14 +289,16 @@ (lambda (let (assoc feature_values - (if encode_features_on_train - (call !ConvertFromInput (assoc - feature_values (unzip (current_value 2) rebalance_feature_indices) - features continuous_rebalance_features - )) + (if (size continuous_rebalance_features) + (if encode_features_on_train + (call !ConvertFromInput (assoc + feature_values (unzip (current_value 2) rebalance_feature_indices) + features continuous_rebalance_features + )) - ;else use feature_values as-is - (unzip (current_value 1) rebalance_feature_indices) + ;else use feature_values as-is + (unzip (current_value 1) rebalance_feature_indices) + ) ) ;default nominal weight to 1 nominal_rebalance_features_weight 1 @@ -337,6 +339,7 @@ (let (assoc nominal_values + (if (size nominal_rebalance_feature_indices) (if encode_features_on_train (call !ConvertFromInput (assoc feature_values (unzip (current_value 2) nominal_rebalance_feature_indices) @@ -346,6 +349,7 @@ ;else use feature_values as-is (unzip (current_value 1) nominal_rebalance_feature_indices) ) + ) ) (assign (assoc diff --git a/howso/types.amlg b/howso/types.amlg index 7523f6938..57b54c996 100644 --- a/howso/types.amlg +++ b/howso/types.amlg @@ -1510,7 +1510,7 @@ description (concat "Perform a focused computation to determine how all the individual values of specified " - "'value_robust_contributions_features' affect the accuracy of 'value_robust_contributions_action_features'." + "'value_robust_contributions_features' affect the accuracy of 'value_robust_contributions_action_feature'." ) ) value_robust_prediction_contributions @@ -1519,7 +1519,7 @@ description (concat "Perform a focused computation to determine how all the individual values of specified " - "'value_robust_contributions_features' affect the predictions of 'value_robust_contributions_action_features'." + "'value_robust_contributions_features' affect the predictions of 'value_robust_contributions_action_feature'." ) ) ) diff --git a/unit_tests/ut_h_rebalance_features.amlg b/unit_tests/ut_h_rebalance_features.amlg index b43dd5bab..5da442f20 100644 --- a/unit_tests/ut_h_rebalance_features.amlg +++ b/unit_tests/ut_h_rebalance_features.amlg @@ -124,9 +124,9 @@ [ 3 0.75 1 ] ;class 'M' reciprocal is 1/(4+1)=0.2, new total rebalance weight is 2.5333, ;new total mass is 8, thus scalar is 8 / 2.5333 = 3.158, 0.2 * 3.158 = 0.6316 - ;distributing about 0.6316 worth of weight among two cases with a ~ 30/70 split: - [ 4 0.93 1.3 ] - [ 5 1.19 1.7 ] + ;distributing about 0.6316 worth of weight among two cases with a ~ 25/75 split: + [ 4 0.91 1.25 ] + [ 5 1.22 1.75 ] [ 6 1 1 ] ] )) @@ -156,8 +156,8 @@ [ 1 1.5 1 ] [ 2 0.75 1 ] [ 3 0.75 1 ] - [ 4 0.93 1.3 ] - [ 5 1.19 1.7 ] + [ 4 0.91 1.25 ] + [ 5 1.22 1.75 ] [ 6 1 1 ] ;unknown class 'L' value is (0.5+0.25)/2=0.375, new total rebalance weight is 2.908 ;new total mass is 9, thus scalar is 9 / 2.908 = 3.095, 0.375 * 3.095 = 1.16 diff --git a/unit_tests/ut_h_value_contributions.amlg b/unit_tests/ut_h_value_contributions.amlg index 6a6ee6286..3047e6d8d 100644 --- a/unit_tests/ut_h_value_contributions.amlg +++ b/unit_tests/ut_h_value_contributions.amlg @@ -56,7 +56,7 @@ result (call_entity "howso" "react_aggregate" (assoc details { "value_robust_accuracy_contributions" .true} - value_robust_contributions_action_features "score" + value_robust_contributions_action_feature "score" value_robust_contributions_features [ "subject" ] )) )) @@ -67,8 +67,8 @@ " everyone does consistently well in 'math', even the poor performers\n\n" ) (call assert_same (assoc - exp [ ["art"] ["math"] ["science"] ["literature"] ["history"] ] - obs (get result ["value_robust_contributions" "feature_values"]) + exp [ ["art"] ["math"] ["science"] ] + obs (trunc (get result ["value_robust_contributions" "feature_values"]) 3) )) (print "Average 'subject' AC matches its robust AC:") @@ -83,7 +83,7 @@ result (call_entity "howso" "react_aggregate" (assoc details { "value_robust_accuracy_contributions" .true} - value_robust_contributions_action_features "score" + value_robust_contributions_action_feature "score" value_robust_contributions_features [ "name" ] )) )) @@ -96,8 +96,12 @@ " 'cora's scores vary widely\n\n" ) (call assert_same (assoc - exp [ ["anne"] ["bill"] ["david"] ["fiona"] ["ed"] ["cora"] ] - obs (get result ["value_robust_contributions" "feature_values"]) + exp [ ["anne"] ["bill"] ["david"] ] + obs (trunc (get result ["value_robust_contributions" "feature_values"]) 3) + )) + (call assert_same (assoc + exp ["cora"] + obs (last (get result ["value_robust_contributions" "feature_values"])) )) (print "Average 'name' AC matches its robust AC:") (call assert_approximate (assoc @@ -117,7 +121,7 @@ "value_robust_accuracy_contributions" .true "value_robust_prediction_contributions" .true } - value_robust_contributions_action_features "score" + value_robust_contributions_action_feature "score" value_robust_contributions_features [ "name" "subject" ] )) )) @@ -129,33 +133,38 @@ " 'ed's scores vary, but are usually consistent within each subject\n\n" ) (call assert_same (assoc - obs (trunc (get result ["value_robust_contributions" "feature_values"]) 3) + obs (trunc (get result ["value_robust_contributions" "feature_values"]) 2) exp [ ["anne" "math"] ["fiona" "science"] - ["anne" "history"] ] )) + + ;top results are fairly consistent + (declare (assoc top_6 (trunc (get result ["value_robust_contributions" "feature_values"]) 6) )) (call assert_same (assoc - obs (trunc (get result ["value_robust_contributions" "feature_values"]) 6) - exp - [ - ["anne" "math"] - ["fiona" "science"] - ["anne" "history"] - ["bill" "math"] - ["ed" "literature"] - ["ed" "history"] - ] - unordered .true + obs + ;top 6 results consistently contain these 5 combinations + (map + (lambda (contains_value top_6 (current_value)) ) + [ + ["anne" "math"] + ["anne" "history"] + ["bill" "math"] + ["ed" "literature"] + ["fiona" "science"] + ] + ) + exp (range (lambda .true) 1 5 1) )) + (print "'Anne + math' and 'Fiona + science' both have high P.C. because they are so extreme: ") (call assert_approximate (assoc obs (trunc (get result ["value_robust_contributions" "pc_values"]) 2) exp [6 6] - thresh 0.3 + thresh 0.333 )) (print "'Anne + math' has very high directional P.C. because she always does so well, while\n" @@ -164,7 +173,7 @@ (call assert_approximate (assoc obs (trunc (get result ["value_robust_contributions" "pc_directional_values"]) 2) exp [6 -6] - thresh 0.3 + thresh 0.333 )) (call exit_if_failures (assoc msg "A.C. and P.C. for two features.")) @@ -173,7 +182,7 @@ result (call_entity "howso" "react_aggregate" (assoc details { "value_robust_accuracy_contributions" .true} - value_robust_contributions_action_features "score" + value_robust_contributions_action_feature "score" value_robust_contributions_features [ "study_time" ] )) )) @@ -199,11 +208,11 @@ ] )) - (print "Average 'study_time' AC matches its robust AC:") + (print "Average 'study_time' AC approximately matches its robust AC:") (call assert_approximate (assoc obs (generalized_mean (get result ["value_robust_contributions" "ac_values"])) exp (get ac_map "study_time") - percent 0.2 + percent 0.25 )) (call exit_if_failures (assoc msg unit_test_name))