Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions howso.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
#!cyclicFeaturesMap (null)
#!numericNominalFeaturesMap (null)
#!editDistanceFeatureTypesMap (null)
#!codeFeatureRecursiveMatchingMap (null)
#!stringNominalFeaturesSet (null)
#!userSpecifiedFeatureErrorsMap (null)
#!averageCaseEntropyAddition (null)
Expand Down Expand Up @@ -466,6 +467,9 @@
;assoc of all string continuous or any json or amalgam features for fast lookup, feature -> data_type
!editDistanceFeatureTypesMap (assoc)

;assoc of code (json/yaml/amalgam) feature -> boolean (whether they are recursive_matching or not)
!codeFeatureRecursiveMatchingMap (assoc)

;assoc of nominal features names whose values are all uniques
!uniqueNominalsSet (assoc)

Expand Down
19 changes: 19 additions & 0 deletions howso/attribute_maps.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,25 @@
)
)

#!ComposeCodeFeatureRecursiveMatchingMap
(map
(lambda
(if (contains_value ["json" "yaml" "amalgam"] (current_value))
;parameter wasn't specified, default to .false for json/yaml and .true for amalgam
(if (= (null) (get feature_attributes [(current_index 1) "recursive_matching"]))
(= "amalgam" (current_value))

;else use the explicitly specified value
(get feature_attributes [(current_index 1) "recursive_matching"])
)

;false for any non-code features (string or string_mixable)
.false
)
)
code_features_map
)

#!ComposeNovelSubstitionFeatureSet
;map-filter weave
(apply "assoc"
Expand Down
18 changes: 17 additions & 1 deletion howso/attributes.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@
; 'string_mixable', 'formatted_time' and 'formatted_date_time' are valid only when type is continuous
; 'boolean' is valid only when type is nominal
;
; 'recursive_matching': boolean, only applicable to code features (when 'data_type' is one of json/yaml/amalgam).
; If unspecified, defaults to false for 'json' and 'yaml' features and true for 'amalgam' features.
; When true, operatations will work recursively on feature values. When false, will operate on positional
; matches without considering recursion, which will yield better and faster results if the schema of the
; semistructured data is not recursive.
;
; 'id_feature': boolean, Set to true only for nominal features containing nominal IDs to specify that this
; feature should be used to compute case weights for id based privacy. For time series,
; this feature will be used as the id for each time series generation. Default is false
Expand Down Expand Up @@ -110,7 +116,8 @@
;
; 'subtype': string, the type used in novel nominal substitution.
;
; 'original_type': string, original data type details. Used by clients to determine how to serialize and deserialize feature data.
; 'original_type': string, original data type details. Used by clients to determine how to serialize and deserialize feature data.
; Note: 'json' features with original_type of 'tokenizable_string' won't be mutated during generative reacts.
;
; 'original_format': string, original data formats used by clients. Automatically populated by clients
; to store client language specific context about features.
Expand Down Expand Up @@ -210,6 +217,7 @@
ordinal_string_to_ordinal_map (assoc)
ordinal_ordinal_to_string_map (assoc)
non_number_continuous_features_map (assoc)
code_feature_recursive_matching_map (assoc)
numeric_nominal_features_map (assoc)
string_nominal_features_set (assoc)
feature_rounding_map (assoc)
Expand Down Expand Up @@ -301,6 +309,13 @@
)
)

(if (size non_number_continuous_features_map)
(assign (assoc
code_feature_recursive_matching_map
(call !ComposeCodeFeatureRecursiveMatchingMap (assoc code_features_map non_number_continuous_features_map))
))
)

;check that ordinal features are either numbers or have a valid ordered list of allowed values
(if (size ordinals_map)
(let
Expand Down Expand Up @@ -585,6 +600,7 @@
!ordinalStringToOrdinalMap ordinal_string_to_ordinal_map
!ordinalOrdinalToStringMap ordinal_ordinal_to_string_map
!editDistanceFeatureTypesMap non_number_continuous_features_map
!codeFeatureRecursiveMatchingMap code_feature_recursive_matching_map
!numericNominalFeaturesMap numeric_nominal_features_map
!stringNominalFeaturesSet string_nominal_features_set
!novelSubstitionFeatureSet novel_substition_feature_set
Expand Down
6 changes: 5 additions & 1 deletion howso/contributions.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
num_training_cases (call !GetNumTrainingCases)
edit_distance_action_feature (contains_index !editDistanceFeatureTypesMap action_feature)
is_string_mixable (= "string_mixable" (get !editDistanceFeatureTypesMap action_feature))
is_recursive_matching (get !codeFeatureRecursiveMatchingMap action_feature)
;store an assoc of lag/rate/delta feature -> lag/order amount for time series flows
ts_feature_lag_amount_map (if !tsTimeFeature (call !BuildTSFeatureLagAmountMap))
max_lag_index_value (null)
Expand Down Expand Up @@ -454,15 +455,17 @@
(call !CombineCode (assoc
sources feature_reactions_with
weights weights_with
is_recursive_matching is_recursive_matching
))
mixed_without
(call !CombineCode (assoc
sources feature_reactions_without
weights weights_without
is_recursive_matching is_recursive_matching
))
)
(list
(edit_distance mixed_with mixed_without is_string_mixable)
(edit_distance mixed_with mixed_without is_string_mixable is_recursive_matching)
(difference mixed_with mixed_without)
)
))
Expand Down Expand Up @@ -637,6 +640,7 @@
(get reaction_with (list "action_values" 0))
(get reaction_without (list "action_values" 0))
is_string_mixable
is_recursive_matching
)
;TODO: 17356, deal with averaging out differences
(difference
Expand Down
1 change: 1 addition & 0 deletions howso/details.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -602,6 +602,7 @@
(call !CombineCode (assoc
sources neighbor_feature_values
weights weights
is_recursive_matching (get !codeFeatureRecursiveMatchingMap (current_value 1))
))

))
Expand Down
1 change: 1 addition & 0 deletions howso/influences.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@
(= "string" (get !editDistanceFeatureTypesMap action_feature))
(= "string_mixable" (get !editDistanceFeatureTypesMap action_feature))
)
(get !codeFeatureRecursiveMatchingMap action_feature)
)

(abs (-
Expand Down
4 changes: 3 additions & 1 deletion howso/react_discriminative.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -706,6 +706,7 @@
(call !CombineCode (assoc
sources candidate_case_values
weights candidate_case_weights
is_recursive_matching (get !codeFeatureRecursiveMatchingMap action_feature)
))

;divide the dot product by the total weight
Expand Down Expand Up @@ -767,6 +768,7 @@
sources (list)
weights (list)
similar_mix_chance 0
is_recursive_matching .false
)

;compute accumed_weights by adding up the total probability mass seen so far
Expand Down Expand Up @@ -807,7 +809,7 @@
frac_b (/ (get weights (current_index 1)) prob_mass)
))

(mix (previous_result) (current_value) frac_a frac_b similar_mix_chance)
(mix (previous_result) (current_value) frac_a frac_b similar_mix_chance is_recursive_matching)
))
sources
)
Expand Down
5 changes: 4 additions & 1 deletion howso/residuals.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -1335,11 +1335,14 @@
;else continuous
(if feature_is_edit_distance
;use string edit distance only if it's a string
(edit_distance case_feature_value interpolated_value
(edit_distance
case_feature_value
interpolated_value
(or
(= "string" (get !editDistanceFeatureTypesMap feature))
(= "string_mixable" (get !editDistanceFeatureTypesMap feature))
)
(get !codeFeatureRecursiveMatchingMap feature)
)

(abs (- case_feature_value interpolated_value))
Expand Down
5 changes: 3 additions & 2 deletions howso/synthesis_utilities.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,7 @@
case_index 1
original_regional_feature_values_map (assoc)
is_string_mixable .false
is_recursive_matching (get !codeFeatureRecursiveMatchingMap feature)
)

;explode all the strings to treat them as lists
Expand Down Expand Up @@ -978,7 +979,7 @@
(declare (assoc
regional_edit_distances_map
(map
(lambda (edit_distance (current_value) intersected_regional_value is_string_mixable))
(lambda (edit_distance (current_value) intersected_regional_value is_string_mixable is_recursive_matching))
regional_feature_values_map
)
))
Expand All @@ -996,7 +997,7 @@
(assoc
local_edit_distances
(map
(lambda (edit_distance (current_value) intersected_local_value is_string_mixable))
(lambda (edit_distance (current_value) intersected_local_value is_string_mixable is_recursive_matching))
(unzip regional_feature_values_map local_case_ids)
)
)
Expand Down
12 changes: 12 additions & 0 deletions howso/types.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@
description "Specifies the observational mean absolute error for this feature. Use when the error value is already known."
)
data_type {ref "FeatureDataType"}
recursive_matching
(assoc
type "boolean"
description
(concat
"Only applicable to code features (when 'data_type' is one of json/yaml/amalgam).\n"
"If unspecified, defaults to false for 'json' and 'yaml' features and true for 'amalgam' features.\n"
"When true, operatations will work recursively on feature values. When false, will operate on positional\n"
"matches without considering recursion, which will yield better and faster results if the schema of the\n"
"semistructured data is not recursive.""
)
)
id_feature
(assoc
type "boolean"
Expand Down
12 changes: 12 additions & 0 deletions unit_tests/ut_h_edit_dist_features.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,18 @@
)
))

(print "Set 'recursive_matching' flag correctly for each feature: ")
(call assert_same (assoc
obs (call_entity "howso" "debug_label" (assoc label "!codeFeatureRecursiveMatchingMap"))
exp
{
amalgam .true
json .false
string .false
yaml .false
}
))

(declare
(assoc
result (null)
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"version": "0.0.0",
"dependencies": {
"amalgam": "70.2.0"
"amalgam": "71.0.0"
}
}
Loading