diff --git a/docs/language.js b/docs/language.js index 3214efcd..b0527b47 100644 --- a/docs/language.js +++ b/docs/language.js @@ -1628,7 +1628,7 @@ var data = [ "parameter" : "query_within_generalized_distance number max_distance list axis_labels list|string axis_values_or_entity_id [number p_value] [list|assoc|assoc of assoc weights] [list|assoc distance_types] [list|assoc attributes] [list|assoc deviations] [string weights_selection_feature] [string|number distance_transform] [string entity_weight_label_name] [number random_seed] [string radius_label] [string numerical_precision] [* output_sorted_list]", "output" : "query", "new value" : "partial", - "description" : "When used as a query argument, selects entities which represent a point within a certain generalized norm to a given point. axis_labels specifies the names of the coordinate axes (as labels on the target entity), and axis_values_or_entity_id specifies the corresponding values for the point to test from or if a string the entity to collect the labels from. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. For attributes, the particular distance_types specifies what is expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values available. For continuous, a null means unbounded where distance for a null will be computed automatically from the relevant data; a single number indicates the difference between a value and a null, a specified uncertainty. Cyclic requires either a single value or a list of two values; a list of two values indicates that the first value, the lower bound, will wrap around to the upper bound, the second value specified; if only a single number is provided instead of a list, then it will assume that number for the upper bound and 0 for the lower bound. For the string distance type, the value specified can be a number indicating the maximum possible string length, inferred if null is provided. For code, the value specified can be a number indicating the maximum number of nodes in the code (including labels), inferred if null is provided. Deviations contains numbers that are used during the distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. max_distance is the maximum distance allowed. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their distances. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be treated as a distance weight exponent, and will be applied to each distance as distance^distance_weight_exponent, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", + "description" : "When used as a query argument, selects entities which represent a point within a certain generalized norm to a given point. axis_labels specifies the names of the coordinate axes (as labels on the target entity), and axis_values_or_entity_id specifies the corresponding values for the point to test from or if a string the entity to collect the labels from. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. For attributes, the particular distance_types specifies what is expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values available. For continuous, a null means unbounded where distance for a null will be computed automatically from the relevant data; a single number indicates the difference between a value and a null, a specified uncertainty. Cyclic requires either a single value or a list of two values; a list of two values indicates that the first value, the lower bound, will wrap around to the upper bound, the second value specified; if only a single number is provided instead of a list, then it will assume that number for the upper bound and 0 for the lower bound. For the string distance type, the value specified can be a number indicating the maximum possible string length, inferred if null is provided. For code, the value specified can be a number indicating the maximum number of nodes in the code (including labels), inferred if null is provided. Deviations contains numbers that are used during the distance calculation, per-element, prior to exponentiation. Specifying null as deviations is equivalent to setting each deviation to 0. max_distance is the maximum distance allowed. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their distances. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be treated as a distance weight exponent, and will be applied to each distance as distance^distance_weight_exponent, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", "example" : "(contained_entities \"TestContainerExec\" (list (query_within_generalized_distance 3 (list \"x\" \"y\") (list 0.0 0.0) 0.01 (list 2 1) (list \"nominal_number\" \"continuous_number_cyclic\") (list 1 360) (null) (null) 1 (null) \"random seed 1234\" \"radius\") ) )" }, @@ -1636,7 +1636,7 @@ var data = [ "parameter" : "query_nearest_generalized_distance list|number entities_returned list axis_labels list|string axis_values_or_entity_id [number p_value] [list|assoc|assoc of assoc weights] [list|assoc distance_types] [list|assoc attributes] [list|assoc deviations] [string weights_selection_feature] [string|number distance_transform] [string entity_weight_label_name] [number random_seed] [string radius_label] [string numerical_precision] [* output_sorted_list]", "output" : "query", "new value" : "partial", - "description" : "When used as a query argument, selects the closest entities which represent a point within a certain generalized norm to a given point. axis_labels specifies the names of the coordinate axes (as labels on the target entity), and axis_values_or_entity_id specifies the corresponding values for the point to test from or if a string the entity to collect the labels from. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their distances. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be treated as a distance weight exponent, and will be applied to each distance as distance^distance_weight_exponent, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", + "description" : "When used as a query argument, selects the closest entities which represent a point within a certain generalized norm to a given point. axis_labels specifies the names of the coordinate axes (as labels on the target entity), and axis_values_or_entity_id specifies the corresponding values for the point to test from or if a string the entity to collect the labels from. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their distances. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be treated as a distance weight exponent, and will be applied to each distance as distance^distance_weight_exponent, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", "example" : "(contained_entities \"TestContainerExec\" (list (query_nearest_generalized_distance 3 (list \"x\" \"y\") (list 0.0 0.0) 0.01 (list 2 1) (list \"nominal_number\" \"continuous_number_cyclic\") (list 1 360) (null) (null) 1 (null) \"random seed 1234\" \"radius\") ) )" }, @@ -1645,7 +1645,7 @@ var data = [ "output" : "query", "new value" : "partial", "concurrency" : true, - "description" : "When used as a query argument, computes the distance or surprisal contribution for every case given by axis_value_lists. axis_value_lists specifies a list of lists, where each inner list is the set of values for each axis, and a distance contribution will be computed for each outer list. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", + "description" : "When used as a query argument, computes the distance or surprisal contribution for every case given by axis_value_lists. axis_value_lists specifies a list of lists, where each inner list is the set of values for each axis, and a distance contribution will be computed for each outer list. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", "example" : "(compute_on_contained_entities \"SurprisalTransformContainer\" (list (query_distance_contributions 4 (list \"x\") [[0]] 1 (null) (null) (null) (list 0.25) (null) \"surprisal\" (null) \"fixed_seed\" (null) \"precise\") ))" }, @@ -1654,7 +1654,7 @@ var data = [ "output" : "query", "new value" : "partial", "concurrency" : true, - "description" : "When used as a query argument, computes the nearest neighbors to every case given by axis_value_lists, normalizes their influence weights, and accumulates the entity's total influence weights relative to every other case. It returns a list of all cases whose cumulative neighbor values are greater than zero. axis_value_lists specifies a list of lists, where each inner list is the set of values for each axis, and a distance contribution will be computed for each outer list. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", + "description" : "When used as a query argument, computes the nearest neighbors to every case given by axis_value_lists, normalizes their influence weights, and accumulates the entity's total influence weights relative to every other case. It returns a list of all cases whose cumulative neighbor values are greater than zero. axis_value_lists specifies a list of lists, where each inner list is the set of values for each axis, and a distance contribution will be computed for each outer list. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", "example" : "(compute_on_contained_entities \"SurprisalTransformContainer\" (list (query_cumulative_nearest_entity_weights 4 (list \"x\") [[0]] 1 (null) (null) (null) (list 0.25) (null) \"surprisal\" (null) \"fixed_seed\" (null) \"precise\") ))" }, @@ -1663,7 +1663,7 @@ var data = [ "output" : "query", "new value" : "partial", "concurrency" : true, - "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null or an empty list, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", + "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null or an empty list, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", "example" : "(compute_on_contained_entities (list (query_entity_convictions 2 (list \"alpha\" \"b\" \"c\") (null) 0.1 (null) (list 0 0 1) ) ))" }, @@ -1672,7 +1672,7 @@ var data = [ "output" : "query", "new value" : "partial", "concurrency" : true, - "description" : "When used as a query argument, computes the case kl divergence for every case given in case_ids_to_compute as a group with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null or an empty list, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included.", + "description" : "When used as a query argument, computes the case kl divergence for every case given in case_ids_to_compute as a group with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null or an empty list, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included.", "example" : "(compute_on_contained_entities (list (query_entity_group_kl_divergence 2 (list \"x\" \"y\") obj2_verts 2 (null) (null) (null) (null) (null) -1 (null) \"random seed 1234\") ))" }, @@ -1681,7 +1681,7 @@ var data = [ "output" : "query", "new value" : "partial", "concurrency" : true, - "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null or an empty list, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", + "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null or an empty list, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", "example" : "(compute_on_contained_entities (list (query_entity_distance_contributions 2 (list \"x\" \"y\") (null) 2 (null) (null) (null) (null) (null) -1 (null) \"random seed 1234\") ))" }, @@ -1690,7 +1690,7 @@ var data = [ "output" : "query", "new value" : "partial", "concurrency" : true, - "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null or an empty list, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", + "description" : "When used as a query argument, computes the case conviction for every case given in case_ids_to_compute with respect to *all* cases in the contained entities set input during a query. If case_ids_to_compute is null or an empty list, case conviction is computed for all cases. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If conviction_of_removal is true, then it will compute the conviction as if the entities specified by entity_ids_to_compute were removed; if false (the default), then will compute the conviction as if those entities were added or included. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", "example" : "(compute_on_contained_entities (list (query_entity_kl_divergences 2 (list \"x\" \"y\") (null) 2 (null) (null) (null) (null) (null) -1 (null) \"random seed 1234\")))" }, @@ -1699,7 +1699,7 @@ var data = [ "output" : "query", "new value" : "partial", "concurrency" : true, - "description" : "When used as a query argument, computes the nearest neighbors to every entity given by entity_ids_to_compute, normalizes their influence weights, and accumulates the entity's total influence weights relative to every other case. It returns a list of all cases whose cumulative neighbor values are greater than zero. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume that the weights are 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", + "description" : "When used as a query argument, computes the nearest neighbors to every entity given by entity_ids_to_compute, normalizes their influence weights, and accumulates the entity's total influence weights relative to every other case. It returns a list of all cases whose cumulative neighbor values are greater than zero. feature_labels specifies the names of the features to consider the during computation. The parameter p_value is the generalized norm parameter, where the value of 1 is probability space and Manhattan distance, the default, 2 being Euclidean distance, etc. The weights parameter specifies how to weight the different dimensions. If weights is a list, each value maps to its respective element in the vectors. If weights is null, then it will assume the weights to be 1 / number of features if distance_transform is probability space or surprisal space, or otherwise 1. If weights is an assoc, then the parameter value_names will select the weights from the assoc. If weights is an assoc of assocs, then the parameter weights_selection_feature will select which set of weights to use and redistribute and normalize any probability masses for unused features to features that are used. The parameter distance_types is either a list strings or an assoc of strings indicating the type of distance for each feature. Allowed values are \"nominal_bool\", \"nominal_number\", \"nominal_string\", \"nominal_code\", \"continuous_number\", \"continuous_number_cyclic\", \"continuous_string\", \"continuous_code_no_recursive_matching\", and \"continuous_code\". Nominals evaluate whether the two values are the same and continuous evaluates the difference between the two values. The numeric, string, or code modifier specifies how the difference is measured, and cyclic means it is a difference that wraps around. \nFor attributes, the particular distance_types specifies what particular attributes are expected. For a nominal distance_type, a number indicates the nominal count, whereas null will infer from the values given. Cyclic requires a single value, which is the upper bound of the difference for the cycle range (e.g., if the value is 360, then the supremum difference between two values will be 360, leading 1 and 359 to have a difference of 2).\n Deviations are used during distance calculation to specify uncertainty per-element, the minimum difference between two values prior to exponentiation. Specifying null as a deviation is equivalent to setting each deviation to 0, unless distance_transform is \"surprisal\" or \"surprisal_to_prob\", in which case it will attempt to infer a deviation. Each deviation for each feature can be a single value or a list. If it is a single value, that value is used as the deviation and differences and deviations for null values will automatically computed from the data based on the maximum difference. If a deviation is provided as a list, then the first value is the deviation, the second value is the difference to use when one of the values being compared is null, and the third value is the difference to use when both of the values are null. If the third value is omitted, it will use the second value for both. If both of the null values are omitted, then it will compute the maximum difference and use that for both. For nominal types, the value for each feature can be a numeric deviation, an assoc, or a list. If the value is an assoc it specifies deviation information, where each key of the assoc is the nominal value, and each value of the assoc can be a numeric deviation value, a list, or an assoc, with the list specifying either an assoc followed optionally by the default deviation. This inner assoc, regardless of whether it is in a list, maps the value to each actual value's deviation. The parameter entities_returned specifies either the number of entities to return, or is a list. If entities_returned is a list, the first element of the list specifies the minimum incremental probability or percent of mass that the next largest entity would comprise (e.g., 0.05 would return at most 20 entities if they were all equal in percent of mass), and the other elements are optional. The second element is the minimum number of entities to return, the third element is the maximum number of entities to return, and the fourth indicates the number of additional entities to include after any of the aforementioned thresholds (defaulting to zero). If there is disagreement among the constraints for entities_returned, the constraint yielding the fewest entities will govern the number of entities returned. The optional radius_label parameter represents the label name of the radius of the entity (if the radius is within the distance, the entity is selected). The optional numerical_precision represents one of three values: \"precise\", which computes every distance with high numerical precision, \"fast\", which computes every distance with lower but faster numerical precision, and \"recompute_precise\", which computes distances quickly with lower precision but then recomputes any distance values that will be returned with higher precision. If called last with compute_on_contained_entities, then it returns an assoc of the entity ids with their convictions. A transform will be applied to these distances based on distance_transform. If distance_transform is \"surprisal\" then distances will be calculated as surprisals, and weights will not be applied to the values. If distance_transform is \"surprisal_to_prob\" then distances will be calculated as surprisals and will be transformed back into probabilities for aggregating, and then transformed back to surprisals. If distance_transform is a number or omitted, which will default to 1.0, then it will be used as a parameter for a generalized mean (e.g., -1 yields the harmonic mean) to average the distances, only using entity weights for nonpositive values of distance_transform. If entity_weight_label_name is specified, it will multiply the resulting value for each entity (after distance_weight_exponent, etc. have been applied) by the value in the label of entity_weight_label_name. If output_sorted_list is not specified or is false, then it will return an assoc of entity string id as the key with the distance as the value; if output_sorted_list is true, then it will return a list of lists, where the first list is the entity ids and the second list contains the corresponding distances, where both lists are in sorted order starting with the closest or most important (based on whether distance_weight_exponent is positive or negative respectively). If output_sorted_list is a string, then it will additionally return a list where the values correspond to the values of the labels for each respective entity. If output_sorted_list is a list of strings, then it will additionally return a list of values for each of the label values for each respective entity.", "example" : "(compute_on_contained_entities (list (query_entity_cumulative_nearest_entity_weights 2 (list \"x\" \"y\") (null) 2 (null) (null) (null) (null) (null) -1 (null) \"random seed 1234\") ))" }, diff --git a/src/Amalgam/entity/EntityQueryBuilder.h b/src/Amalgam/entity/EntityQueryBuilder.h index fac7e80d..fc00bfeb 100644 --- a/src/Amalgam/entity/EntityQueryBuilder.h +++ b/src/Amalgam/entity/EntityQueryBuilder.h @@ -146,7 +146,7 @@ namespace EntityQueryBuilder { feature_attribs.deviation = std::numeric_limits::quiet_NaN(); - if(deviation_node == nullptr) + if(EvaluableNode::IsNull(deviation_node)) return; auto dnt = deviation_node->GetType(); @@ -326,14 +326,18 @@ namespace EntityQueryBuilder else { //get weights + double default_weight = 1.0; + if(dist_eval.computeSurprisal) + default_weight = 1.0 / dist_eval.featureAttribs.size(); + EvaluableNode::ConvertChildNodesAndStoreValue(weights_node, element_names, num_elements, - [&dist_eval](size_t i, bool found, EvaluableNode *en) { + [&dist_eval, default_weight](size_t i, bool found, EvaluableNode *en) { if(i < dist_eval.featureAttribs.size()) { if(found) - dist_eval.featureAttribs[i].weight = EvaluableNode::ToNumber(en, 1.0); + dist_eval.featureAttribs[i].weight = EvaluableNode::ToNumber(en, default_weight); else - dist_eval.featureAttribs[i].weight = 1.0; + dist_eval.featureAttribs[i].weight = default_weight; } }); } @@ -658,12 +662,9 @@ namespace EntityQueryBuilder StringInternPool::StringID weights_selection_feature = string_intern_pool.NOT_A_STRING_ID; if(ocn.size() > WEIGHTS_SELECTION_FEATURE) weights_selection_feature = EvaluableNode::ToStringIDIfExists(ocn[WEIGHTS_SELECTION_FEATURE]); - - PopulateDistanceFeatureParameters(cur_condition->distEvaluator, - cur_condition->positionLabels.size(), cur_condition->positionLabels, - weights_node, weights_selection_feature, distance_types_node, attributes_node, deviations_node); //value transforms for whatever is measured as "distance" + //need to populate distance transform BEFORE populating feature attributes cur_condition->distanceWeightExponent = 1.0; cur_condition->distEvaluator.computeSurprisal = false; cur_condition->distEvaluator.transformSurprisalToProb = false; @@ -690,6 +691,10 @@ namespace EntityQueryBuilder } } + PopulateDistanceFeatureParameters(cur_condition->distEvaluator, + cur_condition->positionLabels.size(), cur_condition->positionLabels, + weights_node, weights_selection_feature, distance_types_node, attributes_node, deviations_node); + cur_condition->weightLabel = StringInternPool::NOT_A_STRING_ID; if(ocn.size() > ENTITY_WEIGHT_LABEL_NAME) cur_condition->weightLabel = EvaluableNode::ToStringIDIfExists(ocn[ENTITY_WEIGHT_LABEL_NAME]); diff --git a/src/Amalgam/evaluablenode/EvaluableNode.h b/src/Amalgam/evaluablenode/EvaluableNode.h index fc3eb193..1ecf7473 100644 --- a/src/Amalgam/evaluablenode/EvaluableNode.h +++ b/src/Amalgam/evaluablenode/EvaluableNode.h @@ -825,7 +825,7 @@ class EvaluableNode static inline void ConvertChildNodesAndStoreValue(EvaluableNode *node, std::vector &element_names, size_t num_expected_elements, StoreValueFunction store_value) { - if(node == nullptr || node->IsImmediate()) + if(EvaluableNode::IsNull(node) || node->IsImmediate()) { //fill in with the node's value for(size_t i = 0; i < num_expected_elements; i++)