diff --git a/howso.amlg b/howso.amlg index facae379b..0c8a174f7 100644 --- a/howso.amlg +++ b/howso.amlg @@ -202,6 +202,7 @@ "attribute_maps" "analysis" "analysis_weights" + "boundary_values" "contributions" "conviction" "custom_codes" diff --git a/howso/boundary_values.amlg b/howso/boundary_values.amlg new file mode 100644 index 000000000..a4b4fcbf6 --- /dev/null +++ b/howso/boundary_values.amlg @@ -0,0 +1,882 @@ +(null + + ;Compute the values for context features that would perturb the prediction action values + #!ComputeBoundaryValues + (let + (assoc + context_feature_index_map (zip context_features (indices context_features)) + action_feature_index_map (zip action_features (indices action_features)) + + boundary_action_condition (get details "boundary_value_action_outcome") + boundary_value_features (get details "boundary_value_context_features") + ) + + (declare (assoc + feature_deviations (get hyperparam_map "featureDeviations") + feature_weights (get hyperparam_map "featureWeights") + + ;base used in exponential searches for boundaries (continuous and numeric ordinals) + exp_search_base 1.1 + )) + + (if (get hyperparam_map "subtraineeName") + (call !UseDynamicDeviationsAndWeights (assoc + context_features context_features + context_values context_values + hyperparam_map hyperparam_map + )) + ) + + ;define eval_function, a piece of code that can be called which will either return: + ; 1. a boolean indicating if `new_action_values` represent new action values that either + ; crossed the boundary or satisfied the user given conditions + ; 2. a value representing the ratio of the difference in context & action values + ; (`new_context_values` & `new_action_values`) to the difference in just the context values + (declare (assoc + eval_function + (if boundary_action_condition + ;function that evaluates new_action_values to determine if they meet the user-specified conditions + (lambda + (apply "and" (values + (map + (lambda + (let + (assoc + feat (current_index 1) + condition (current_value 1) + feat_value (get new_action_values (get action_feature_index_map (current_index 1))) + ) + + (if (contains_index !nominalsMap feat) + ;nominals block, users can specify one value that it must match or a list of possible values + (if (~ (list) condition) + (contains_value condition feat_value) + + (= condition feat_value) + ) + + ;continuous, users specify a tuple of the accepted range + (and + (>= feat_value (first condition) ) + (<= feat_value (last condition) ) + ) + ) + ) + + ) + boundary_action_condition + ) + )) + ) + + ;no specified boundary, define a function that will indicate a boundary based on action values + (let + (assoc + num_nominal_features (size (filter (lambda (contains_index !nominalsMap (current_value))) action_features)) + ) + (if (= (size action_features) num_nominal_features) + ;if just nominals, any change will indicate a boundary crossed + (lambda (!= action_values new_action_values)) + + ;else there is some non-nominal thing, so do the ratio of total distance to context distance + (lambda + (/ + ;difference from original context/action to updated context/action + (generalized_distance + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + p_parameter + (append context_values action_values) + (append new_context_values new_action_values) + (append context_features action_features) + (null) + ) + ;difference from original context to updated context + (generalized_distance + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + p_parameter + context_values + new_context_values + context_features + (null) + ) + ) + ) + ) + ) + ) + )) + + ;if well_defined_boundary is true, then eval_function returns a boolean indicating if a/the boundary has + ;been crossed. If false, eval_function returns a ratio of the difference in context/action to difference in context + ;and the boundary_values returned will attempt to maximize this ratio. + (declare (assoc well_defined_boundary (contains_value ["!=" "apply"] (get_type_string eval_function)) )) + + (declare (assoc + boundary_values + ;flow where there must just be some difference in the action_value + (map + (lambda + (let + (assoc + query_feature (current_index 1) + query_feature_index (get context_feature_index_map (current_index 1)) + remaining_context_features (filter (lambda (!= (current_index 2) (current_value))) context_features) + feature_value (get context_values (get context_feature_index_map (current_index 1))) + ) + + (if (contains_index !nominalsMap query_feature) + ;Nominals Block + (call !ComputeNominalBoundaryValues) + + (contains_value !ordinalFeatures query_feature) + ;Ordinals Block + (call !ComputeOrdinalBoundaryValues) + + + (contains_index !editDistanceFeatureTypesMap query_feature) + ;Not supported. + (null) + + ;else assumed continuous + (call !ComputeContinuousBoundaryValues) + ) + ) + ) + (zip boundary_value_features) + ) + )) + + (accum (assoc output (assoc "boundary_values" boundary_values) )) + ) + + ;small helper function for checking a new context value to evaluate if it's prediction crosses a well-defined boundary + ;the new value to test should be "new_feature_value" + ;otherwise uses variables that should be generally available within boundary value finding logic + #!DoesNewValueCrossKnownBoundary + (call eval_function (assoc + new_action_values + (get + (call !ReactDiscriminative (assoc + action_features action_features + context_features context_features + context_values + (set context_values query_feature_index new_feature_value) + skip_encoding (true) + skip_decoding (true) + details {} + return_action_values_only (true) + )) + "action_values" + ) + )) + + ;small helper function for evaluating the boundary ratio of a new context value + ;the new value to test should be "new_feature_value" + ;otherwise uses variables that should be generally available within boundary value finding logic + #!EvaluateNewValueBoundaryRatio + (let + (assoc + new_context_values (set context_values query_feature_index new_feature_value) + ) + (call eval_function (assoc + new_action_values + (get + (call !ReactDiscriminative (assoc + action_features action_features + context_features context_features + context_values new_context_values + details {} + skip_encoding (true) + skip_decoding (true) + return_action_values_only (true) + )) + "action_values" + ) + new_context_values new_context_values + )) + ) + + + ;helper function for ComputeBoundaryValues that computes the boundary values for a feature if it is ordinal + #!ComputeOrdinalBoundaryValues + (let + (assoc + string_to_ordinal_map (get !ordinalStringToOrdinalMap query_feature) + + lesser_boundary_value (null) + greater_boundary_value (null) + ) + + (if string_to_ordinal_map + ;string ordinal + (let + ;context_values have already been transformed to their integer values at this point + (assoc + ;values below the given value, sorted in increasing distance from given value + lesser_other_values + (sort + (lambda + (- + (current_value 1) + (current_value) + ) + ) + (filter + (lambda (< (current_value) feature_value)) + (values string_to_ordinal_map) + ) + ) + ;values above the given value, sorted in increasing distance from given value + greater_other_values + (sort + (lambda + (- + (current_value) + (current_value 1) + ) + ) + (filter + (lambda (> (current_value) feature_value)) + (values string_to_ordinal_map) + ) + ) + + ) + + (if well_defined_boundary + (seq + ;find the first of the lesser values to cross the boundary (if any) + (while + (and + (= (null) lesser_boundary_value) + (< (current_index) (size lesser_other_values)) + ) + (if (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value (get lesser_other_values (current_index 1)) )) + (assign (assoc lesser_boundary_value (get lesser_other_values (current_index 1)) )) + ) + ) + + ;find the first of the greater values to cross the boundary (if any) + (while + (and + (= (null) greater_boundary_value) + (< (current_index) (size greater_other_values)) + ) + (if (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value (get greater_other_values (current_index 1)) )) + (assign (assoc greater_boundary_value (get greater_other_values (current_index 1)) )) + ) + ) + ) + + ;Need to find the values with the biggest eval_function value + (assign (assoc + lesser_boundary_value (call !MaximizeBoundaryRatio (assoc candidate_boundary_values lesser_other_values)) + greater_boundary_value (call !MaximizeBoundaryRatio (assoc candidate_boundary_values greater_other_values)) + )) + ) + ) + + ;numeric ordinals. Leverage the continuous feature code but with strict rounding to the integer level + (let + (assoc + rounding_tuple [(null) 0] + feature_bounds (get !featureBoundsMap query_feature) + feature_residual (get hyperparam_map ["featureResiduals" query_feature]) + ) + + ;get bounds if undefined + (if (= 0 (size feature_bounds)) + (assign (assoc + feature_bounds + (assoc + "min" + (retrieve_from_entity + (contained_entities [ + (query_exists query_feature) + (query_min query_feature 1) + ]) + query_feature + ) + "max" + (retrieve_from_entity + (contained_entities [ + (query_exists query_feature) + (query_max query_feature 1) + ]) + query_feature + ) + ) + )) + ) + + (if (not feature_residual) + (assign (assoc + feature_residual + (compute_on_contained_entities [ + (query_exists query_feature) + (query_min_difference query_feature) + ]) + )) + ) + + (if well_defined_boundary + ;if the boundary is known and need not be found, use this func + (call !ComputeContinuousBoundaryValuesKnownBoundary) + + ;else the boundary needs to be determined and the value selected accordingly + (call !ComputeContinuousBoundaryValuesUnknownBoundary) + ) + ) + + ) + ;return the lesser/greater boundary values as a tuple for the feature + ;these are possibly encoded and need to be decoded + (if (and (not skip_decoding) string_to_ordinal_map) + (call !ConvertToOutput (assoc + features [query_feature query_feature] + feature_values [lesser_boundary_value greater_boundary_value] + )) + + ;else just return the cases as-is + [lesser_boundary_value greater_boundary_value] + ) + ) + + ;helper function for ComputeBoundaryValues that computes boundary values for nominal features + #!ComputeNominalBoundaryValues + (let + (assoc + boundary_value (null) + ) + + ;create a map of the other nominal classes to their distance from the feature value + ;if using an informed SDM, different nominal classes may be more similar to the feature value + (declare (assoc + other_possible_values_distances_map + (map + (lambda + (generalized_distance + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + p_parameter + [feature_value] + [(current_index 1)] + (null) + [query_feature] + ) + ) + (compute_on_contained_entities [ + (query_exists query_feature) + (query_not_equals query_feature feature_value) + (query_value_masses query_feature (null) (contains_index !numericNominalFeaturesMap query_feature)) + ]) + ) + )) + + ;this is list of the nominal classes aside from the given value ordered by + ;decreasing similarity either by the SDM deviations or the the values found in + ;similar data + (declare (assoc + candidate_boundary_values + (if (apply "+" (values other_possible_values_distances_map)) + ;all other classes are the same distance away from the feature_value + ;now search the increasing size neighborhoods for similar values + (let + (assoc + case_bandwidth (get hyperparam_map "k") + similar_cases_lists (null) + ) + + (while + (and + ;expand the local data search up to 3 times (or until all classes have been observed) + (< (current_index) 3) + (!= + ;the unique classes of the feature observed in the neighborhood + (size (values (get similar_cases_lists 2) (true))) + (size other_possible_values_distances_map) + ) + ) + (assign (assoc + similar_cases_lists + (compute_on_contained_entities (append + filtering_queries + (query_not_equals query_feature feature_value) + (query_nearest_generalized_distance + case_bandwidth + remaining_context_features + (remove context_values [query_feature_index]) + feature_weights + !queryDistanceTypeMap + query_feature_attributes_map + feature_deviations + p_parameter + 1 ;dt of 1 queries distance in ascending order + (if valid_weight_feature weight_feature (null)) + ;use a fixed random seed to guarantee deterministic behavior for reacts (named "fixed rand seed") + "fixed rand seed" + (null) ;radius + !numericalPrecision + query_feature + ) + )) + )) + (assign (assoc + case_bandwidth (floor (* case_bandwidth 2.72)) + )) + ) + ;(values X (true)) maintains the order still + (values (last similar_cases_lists) (true)) + ) + + ;otherwise some of the other classes are more similar to the feature value + ;than others, check them in order of decreasing similarity + (sort + (lambda + (- + (get other_possible_values_distances_map (current_value)) + (get other_possible_values_distances_map (current_value 1)) + ) + ) + (indices other_possible_values_distances_map) + ) + ) + + )) + + (if well_defined_boundary + ;Find first class in candidate_boundary_values that crosses the boundary (if any) + (while + (and + (= (null) boundary_value) + (< (current_index) (size candidate_boundary_values)) + ) + (if (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value (get candidate_boundary_values (current_index 1)) )) + (assign (assoc boundary_value (get candidate_boundary_values (current_index 1)) )) + ) + ) + + ;Else need to find the value in candidate_boundary_values that maximizes the ratio + ;of action distance to context/action distance + (assign (assoc + boundary_value + ;only need the value, and not the half distance + (first + ;this can be used as a helper function within boundary values, giving the value of + ;candidate_boundary_values that has the highest value outputted by eval_function, + ;returns a tuple of the value that maximized the boundary ratio and half the distance + ;to its nearest candidate for continuous values + #!MaximizeBoundaryRatio + (let + (assoc + candidate_value_dist_ratios + (map + (lambda + (call !EvaluateNewValueBoundaryRatio (assoc new_feature_value (current_value 1) )) + ) + candidate_boundary_values + ) + ) + + (declare (assoc + index_of_max_boundary_ratio + (first + (sort + (lambda + (- + (get candidate_value_dist_ratios (current_value 1)) + (get candidate_value_dist_ratios (current_value)) + ) + ) + ;reversing the values makes sure the first value with the max value is returned + (reverse (indices candidate_value_dist_ratios)) + ) + ) + )) + + (declare (assoc + most_similar_value_index + ;if searching numbers, find the tuple of the nearest, can be left or right + (if (~ 0 (first candidate_boundary_values)) + (if (> + ;difference of value to the right + (abs (- + (get candidate_boundary_values index_of_max_boundary_ratio) + ;fall back to infinity if out of index + (or (get candidate_boundary_values (+ index_of_max_boundary_ratio 1)) .infinity) + )) + ;difference of value to the left + (abs (- + (get candidate_boundary_values index_of_max_boundary_ratio) + (or (get candidate_boundary_values (- index_of_max_boundary_ratio 1)) .infinity) + )) + ) + ;right difference greater, return index to the left + (- index_of_max_boundary_ratio 1) + + ;else right + (+ index_of_max_boundary_ratio 1) + ) + ) + )) + + ;tuple of the value with max boundary ratio and the half the distance to the most similar + ;value in candidate_boundary_values if the feature is continous (used for binary search + ;for continuous features) + [ + (get + candidate_boundary_values + index_of_max_boundary_ratio + ) + (/ + (abs (- + (get candidate_boundary_values index_of_max_boundary_ratio) + (get candidate_boundary_values most_similar_value_index) + )) + 2 + ) + ] + ) + ) + )) + ) + + boundary_value + ) + + ;helper function for ComputeBoundaryValues that find the boundary values for continuous features + #!ComputeContinuousBoundaryValues + (let + (assoc + feature_bounds (get !featureBoundsMap query_feature) + feature_residual (get hyperparam_map ["featureResiduals" query_feature]) + rounding_tuple (get !featureRoundingMap query_feature) + + ;outputs + lesser_boundary_value (null) + greater_boundary_value (null) + ) + + (if (not feature_residual) + (assign (assoc + feature_residual + (compute_on_contained_entities [ + (query_exists query_feature) + (query_min_difference query_feature) + ]) + )) + ) + + ;get bounds if undefined + (if (= 0 (size feature_bounds)) + (assign (assoc + feature_bounds + (assoc + "min" + (retrieve_from_entity + (contained_entities [ + (query_exists query_feature) + (query_min query_feature 1) + ]) + query_feature + ) + "max" + (retrieve_from_entity + (contained_entities [ + (query_exists query_feature) + (query_max query_feature 1) + ]) + query_feature + ) + ) + )) + ) + + (if well_defined_boundary + ;if the boundary is known and need not be found, use this func + (call !ComputeContinuousBoundaryValuesKnownBoundary) + + ;else the boundary needs to be determined and the value selected accordingly + (call !ComputeContinuousBoundaryValuesUnknownBoundary) + ) + ;return a tuple of lower boundary value and greater boundary value, both may be (null) + (if (and (not skip_decoding) !hasEncodedFeatures) + (call !ConvertToOutput (assoc + features [query_feature query_feature] + feature_values [lesser_boundary_value greater_boundary_value] + )) + + ;else just return the cases as-is + [lesser_boundary_value greater_boundary_value] + ) + ) + + ;helper function for ComputeBoundaryValues that finds the boundary values for a continuous context (and numeric continuous) + ;feature when the boundary is well defined (a change in a nominal action or a specified threshold for + ;a continuous action, and eval_function returns a boolean value). + ;assigns to lesser_boundary_value and greater_boundary_value + #!ComputeContinuousBoundaryValuesKnownBoundary + (declare + (assoc + ;used for binary search + center (null) + left (null) + right (null) + + ;used for initial exponential search + query_delta (/ feature_residual 2) + ) + + ;check lower side. start half a residual below and keep doubling the distance from the case value + ;until the boundary condition is true (or go below min bound) + (while + (and + (> (- feature_value query_delta) (get feature_bounds "min")) + (not + (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value (- feature_value query_delta) )) + ) + ) + (assign (assoc query_delta (* exp_search_base query_delta) )) + ) + + + ;if the query_value is still greater than the min bound, then we must have crossed the boundary + (if (> (- feature_value query_delta) (get feature_bounds "min")) + (seq + ;but now binary search between min bound and context value + (assign (assoc + left (- feature_value query_delta) + right (- feature_value (/ query_delta exp_search_base)) + )) + (assign (assoc center (/ (+ left right) 2) )) + + ;binary search until within half a residual + (while + (> + (- right center) + (/ feature_residual 2) + ) + + (if (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value center)) + ;boundary still crossed, check right side + (assign (assoc left center )) + + ;else, boundary uncrossed, check left side + (assign (assoc right center )) + ) + (assign (assoc center (/ (+ left right) 2) )) + ) + + (assign (assoc + lesser_boundary_value + (if (contains_value !ordinalFeatures query_feature) + ;ordinal, need to be sure we round in a way that still crosses the boundary + ;prioritize the ceiling version as we are searching downward + (if (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value (ceil center) )) + (ceil center) + + ;else use the floor + (floor center) + ) + + ;else standard continuous, just round normally + (if rounding_tuple + (apply "round" (append center rounding_tuple)) + center + ) + ) + )) + ) + ) + + ;check upper side. start half a residual above and keep doubling the distance from the case value + ;until the boundary condition is true (or go beyond max bound) + (assign (assoc query_delta (/ feature_residual 2) )) + (while + (and + (< (+ feature_value query_delta) (get feature_bounds "max")) + (not + (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value (+ feature_value query_delta) )) + ) + ) + (assign (assoc query_delta (* exp_search_base query_delta) )) + ) + + + ;if the query_value is still less than the max bound, then we must have crossed the boundary + (if (< (+ feature_value query_delta) (get feature_bounds "max")) + (seq + ;but now binary search between min bound and context value + (assign (assoc + left (+ feature_value (/ query_delta exp_search_base)) + right (+ feature_value query_delta) + )) + (assign (assoc center (/ (+ left right) 2) )) + + ;binary search until within a residual + (while + (> + (- right center) + (/ feature_residual 2) + ) + + (if (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value center)) + ;boundary still crossed, check left side + (assign (assoc right center )) + + ;else, boundary uncrossed, check right side + (assign (assoc left center )) + ) + (assign (assoc center (/ (+ left right) 2) )) + ) + + (assign (assoc + greater_boundary_value + (if (contains_value !ordinalFeatures query_feature) + ;ordinal, need to be sure we round in a way that still crosses the boundary + ;prioritize the floor version as we are searching upward + (if (call !DoesNewValueCrossKnownBoundary (assoc new_feature_value (floor center) )) + (floor center) + + ;else use the ceiling + (ceil center) + ) + + ;else standard continuous, just round normally + (if rounding_tuple + (apply "round" (append center rounding_tuple)) + center + ) + ) + )) + ) + ) + ) + + ;helper function for !ComputeBoundaryValues that computes boundary values for + ;continuous context features when the boundary needs to be "found". + ;in this context, eval_function should return a value indicating the ratio of the distance + ;of the vector of change in contexts/action to the distance of the vector of change in context + ;by maximizing this ratio, we are attempting to find the boundary where the change in contexts + ;creates the largest change in action + #!ComputeContinuousBoundaryValuesUnknownBoundary + (declare + (assoc + ;first must check a variety of values lesser than the feature value, do so exponentially. + ;try half a residual below, a residual below, double residual below, and so on up until the bound + ;would be met + lesser_side_candidate_values + (map + (lambda + (- feature_value (* feature_residual (pow exp_search_base (current_value)))) + ) + ;range from -1 to max exponent of 2 which multiplied by feature residual is less than the + ;difference between the feature value and the minimum bound + (range + -1 + (floor (log + (/ (- feature_value (get feature_bounds "min")) feature_residual) + exp_search_base + )) + 1 + ) + ) + ) + + ;do not search for a boundary if we are within half a residual of the minimum bound + (if (size lesser_side_candidate_values) + ;take the value where the BoundaryRatio is at its greatest + (let + (assoc + maximize_tuple (call !MaximizeBoundaryRatio (assoc candidate_boundary_values lesser_side_candidate_values)) + ) + (declare (assoc + query_value (first maximize_tuple) + query_delta (last maximize_tuple) + )) + ;to find the maxima, start at the selected value and check both sides + ; then do the same on the side that was greater iterating to smaller and smaller regions + #!BinarySearchForMaxima + (while (> query_delta (/ feature_residual 2)) + (if + (> + ;boundary ratio of lesser value + (call !EvaluateNewValueBoundaryRatio (assoc new_feature_value (- query_value query_delta))) + + ;boundary ratio of greater value + (call !EvaluateNewValueBoundaryRatio (assoc new_feature_value (+ query_value query_delta))) + ) + + ;left side had greater value, iterate there + (assign (assoc query_value (- query_value (/ query_delta 2)) )) + + ;right side had greater value, iterate there + (assign (assoc query_value (+ query_value (/ query_delta 2)) )) + ) + (assign (assoc query_delta (/ query_delta 2) )) + ) + + (assign (assoc + lesser_boundary_value + (if rounding_tuple + (apply "round" (append query_value rounding_tuple)) + query_value + ) + )) + ) + ) + + ;now repeat the process for values above + (declare (assoc + greater_side_candidate_values + (map + (lambda + (+ feature_value (* feature_residual (pow exp_search_base (current_value)))) + ) + ;range from -1 to max exponent of 2 which multiplied by feature residual is less than the + ;difference between the feature value and the minimum bound + (range + -1 + (floor (log + (/ (- (get feature_bounds "max") feature_value) feature_residual) + exp_search_base + )) + 1 + ) + ) + )) + + ;only search for boundary value if far enough from maximum bound + (if (size greater_side_candidate_values) + ;take the first point where it decreased and attempt to find the local maxima before the decrease + (let + (assoc + maximize_tuple (call !MaximizeBoundaryRatio (assoc candidate_boundary_values greater_side_candidate_values)) + ) + (declare (assoc + query_value (first maximize_tuple) + query_delta (last maximize_tuple) + )) + ;to find the maxima, start at the selected value and check both sides + ; then do the same on the side that was greater iterating to smaller and smaller regions + (call !BinarySearchForMaxima) + + (assign (assoc + greater_boundary_value + (if rounding_tuple + (apply "round" (append query_value rounding_tuple)) + query_value + ) + )) + ) + ) + ) +) \ No newline at end of file diff --git a/howso/details.amlg b/howso/details.amlg index 990bffe80..57c8dc8fd 100644 --- a/howso/details.amlg +++ b/howso/details.amlg @@ -267,6 +267,10 @@ (call !ComputeBoundaryCases) ) + (if (get details "boundary_value_context_features") + (call !ComputeBoundaryValues) + ) + ;a dict of feature -> dict of local_min/local_max and input_case_value for features whose context values are outside the locals' min-max (if (get details "outlying_feature_values") (call !ComputeOutlyingFeatureValues) diff --git a/howso/return_typing.amlg b/howso/return_typing.amlg index 7753f14ce..2a54df259 100644 --- a/howso/return_typing.amlg +++ b/howso/return_typing.amlg @@ -25,6 +25,17 @@ description "A list of lists of predicted values for each case." required (true) } + "boundary_values" + { + type "list" + values {type "assoc" values "any"} + description + (concat + "A list of maps of feature names to the boundary values computed for them. For continuous and ordinal features, a tuple of values " + "will be returned indicating the values of a boundary above and below the given value. For nominals, only the boundary value " + "itself is returned. A missing value indicates that there was no boundary found." + ) + } "categorical_action_probabilities" { type "list" diff --git a/howso/typing.amlg b/howso/typing.amlg index ec35b44d4..ad2c8db93 100644 --- a/howso/typing.amlg +++ b/howso/typing.amlg @@ -840,6 +840,33 @@ "to determine the boundary instead." ) ) + boundary_value_context_features + (assoc + type "list" + values "string" + description + (concat + "When specified, outputs a map of each specified feature to possible boundary values under \"boundary_values\". " + "If 'boundary_value_action_outcome' is " + "unspecified, then the returned values are values where the ratio of change in action to total change is maximized. " + "If 'boundary_value_action_outcome' is specified, then the boundary values represent the closest found values for " + "each context feature that shift the action to fulfill the condition." + ) + ) + boundary_value_action_outcome + (assoc + ref "Condition" + description + (concat + "The condition map to define the boundary within which the action of a prediction will be shifted to when the returned " + " boundary values are used as context values instead of those given. The dictionary keys " + "are the feature name and values are one of:\n" + "- None\n" + "- A value, must match exactly.\n" + "- An array of two numeric values, specifying an inclusive range. Only applicable to continuous and numeric ordinal features.\n" + "- An array of string values, must match any of these values exactly. Only applicable to nominal and string ordinal features.\n" + ) + ) num_boundary_cases (assoc type "number" diff --git a/unit_tests/ut_h_boundary_values.amlg b/unit_tests/ut_h_boundary_values.amlg new file mode 100644 index 000000000..0c0b1a1d1 --- /dev/null +++ b/unit_tests/ut_h_boundary_values.amlg @@ -0,0 +1,207 @@ +(seq + #unit_test (direct_assign_to_entities (assoc unit_test (load "unit_test.amlg"))) + (call (load "unit_test_howso.amlg") (assoc name "ut_h_boundary_cases.amlg")) + + (call_entity "howso" "set_feature_attributes" (assoc + feature_attributes + (assoc + "ord_string" (assoc "type" "ordinal" "bounds" (assoc "allowed" (list "low" "high1" "high2"))) + "ord" (assoc "type" "ordinal" "data_type" "number") + "nom" (assoc "type" "nominal") + ) + )) + + (declare (assoc + result (null) + features (list "f1" "f2" "ord" "ord_string" "nom") + data + ;the concept is that the data has a clear boundary across the middle here + (list + ;"f1" "f2" "ord" "ord_string" "nom" + [5 10 2 "low" "A"] ;section 1 starts here + [6 9 3 "low" "B"] + [7 8 2 "low" "C"] + [8 7 3 "low" "A"] + [9 6 2 "low" "B"] + [10 5 3 "low" "C"] + [11 21 6 "high1" "D"] ;section 2 starts here + [12 22 7 "high2" "E"] + [13 23 6 "high1" "F"] + [14 24 7 "high2" "D"] + [15 25 6 "high1" "E"] + [16 26 7 "high2" "F"] + [17 27 6 "high1" "D"] + [18 28 7 "high2" "E"] + ) + )) + + (call_entity "howso" "train" (assoc + cases data + features features + session "session" + )) + + (call_entity "howso" "analyze" (assoc + k_values [3] + p_values [0.1] + use_deviations (false) + )) + + ;use f1 to predict f2 + (assign (assoc + result + (call_entity "howso" "react" (assoc + context_features ["f1"] + context_values [[13]] + action_features ["f2"] + details + { + "boundary_value_context_features" ["f1"] + "boundary_value_action_outcome" + { + ;at what boundary value does f2 start to be predicted between 0-10? + "f2" [0 10] + } + } + )) + )) + (call keep_result_payload) + + (print "Continuous context to continuous action, known boundary: \n") + ;This should be just below 10 because that's when the influential cases should all start + ;to belong to the other side + (print "Lesser Bound: ") + (call assert_approximate (assoc + exp 9.5 + obs (get result ["boundary_values" 0 "f1" 0]) + thresh 1.0 + )) + + (print "Greater Bound: ") + ;No value greater than 13 will bring down the value of f2 to below 10 + (call assert_null (assoc + obs (get result ["boundary_values" 0 "f1" 1]) + )) + + ;use ord to predict ord_string + (assign (assoc + result + (call_entity "howso" "react" (assoc + context_features ["ord"] + context_values [[7]] + action_features ["nom"] + details + { + "boundary_value_context_features" ["ord"] + "boundary_value_action_outcome" + { + ;at what boundary value does nom start to be predicted as A, B, or C? + "nom" ["A" "B" "C"] + } + } + )) + )) + (call keep_result_payload) + + (print "Numeric ordinal context to nominal action, known boundary: \n") + ;ordinals 2/3 will have the desired nominal classes, so I expect a boundary of 4 + (print "Lesser Bound: ") + (call assert_approximate (assoc + exp 4 + obs (get result ["boundary_values" 0 "ord" 0]) + thresh 0.5 + )) + + (print "Greater Bound: ") + ;There are no ordinal values above 7 + (call assert_null (assoc + obs (get result ["boundary_values" 0 "ord" 1]) + )) + + ;using nom to predict f2, known boundary + (assign (assoc + result + (call_entity "howso" "react" (assoc + context_features ["ord_string"] + context_values [["high1"]] + action_features ["f2"] + details + { + "boundary_value_context_features" ["ord_string"] + "boundary_value_action_outcome" + { + ;at what boundary value does f2 start to be predicted 0-10? + "f2" [0 10] + } + } + )) + )) + (call keep_result_payload) + + (print "ordinal string context to continuous action, known boundary: \n") + ;ord_string "low", will have all the f2 values 10 and below + (print "Lesser Bound: ") + (call assert_same (assoc + exp "low" + obs (get result ["boundary_values" 0 "ord_string" 0]) + )) + + (print "Greater Bound: ") + ;"high2" will not cross the boundary, so null + (call assert_null (assoc + obs (get result ["boundary_values" 0 "ord" 1]) + )) + + ;using nom to predict f2 and ord, unknown boundary + (assign (assoc + result + (call_entity "howso" "react" (assoc + context_features ["nom"] + context_values [["B"]] + action_features ["f2" "ord"] + details + { + "boundary_value_context_features" ["nom"] + } + )) + )) + (call keep_result_payload) + + (print "Nominal context to predict 2 action features, unknown boundary: ") + ;"A" and "C" should return fairly similar predictions while any of D, E, and F, cross the data boundary + (call assert_true (assoc + obs (contains_value ["D" "E" "F"] (get result ["boundary_values" 0 "nom"])) + )) + + + ;using f1 to predict f2, unknown boundary + (assign (assoc + result + (call_entity "howso" "react" (assoc + context_features ["f1"] + context_values [[6]] + action_features ["f2"] + details + { + "boundary_value_context_features" ["f1"] + } + )) + )) + (call keep_result_payload) + + (print "continuous context to continuous action, unknown boundary: \n") + (print "Lesser Bound: ") + ;the value is too close to the lower bound, so null + (call assert_null (assoc + obs (get result ["boundary_values" 0 "f1" 0]) + )) + + (print "Greater Bound: ") + (call assert_approximate (assoc + exp 11 + obs (get result ["boundary_values" 0 "f1" 1]) + thresh 2 + )) + + (call exit_if_failures (assoc msg unit_test_name)) +) \ No newline at end of file diff --git a/unit_tests/ut_howso.amlg b/unit_tests/ut_howso.amlg index d93cfe418..caf3b9334 100644 --- a/unit_tests/ut_howso.amlg +++ b/unit_tests/ut_howso.amlg @@ -14,6 +14,7 @@ "ut_h_train_react.amlg" "ut_h_react_audit.amlg" "ut_h_basic_ablation.amlg" + "ut_h_boundary_values.amlg" "ut_h_reduce_data.amlg" "ut_h_case_removal.amlg" "ut_h_ordinal.amlg"