Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
6fa62d4
19790: Added ablation to time series training
howsoRes Oct 25, 2024
b208836
more dev
howsoRes Oct 25, 2024
5679697
WIP
howsoRes Oct 28, 2024
5f21516
fixes
howsoRes Oct 28, 2024
76dcc47
fixes
howsoRes Oct 28, 2024
7365eaa
more fixes
howsoRes Oct 29, 2024
c0dca8a
fixes
howsoRes Oct 29, 2024
717bfc3
don't ablate first and last cases in a series
howsoRes Oct 30, 2024
c3cdba3
fixes ablation indices
howsoRes Oct 30, 2024
4e31e7e
added TODO
howsoRes Oct 30, 2024
05745e0
tweaks and todos
howsoRes Oct 30, 2024
bfa279d
Merge branch 'main' into 19790-train-ts-ablation
howsoRes Oct 31, 2024
af566a6
fix ordering of ablated indices
howsoRes Nov 1, 2024
2fb019c
22095: Removes uses of query_count
howsohazard Nov 3, 2024
f510e34
accumulate weights for ablation regardless of autoanalyze
howsoRes Nov 4, 2024
bca3884
fix formatting
howsoRes Nov 4, 2024
670aa23
version dependency bump
howsoRes Nov 4, 2024
c0923b2
Merge branch '22095-query-count-removal' into 19790-train-ts-ablation
howsoRes Nov 4, 2024
76a135b
fix multisort
howsoRes Nov 5, 2024
76ae9ce
fix multisort
howsoRes Nov 5, 2024
77cbdf1
Merge branch 'main' into 19790-train-ts-ablation
howsoRes Nov 5, 2024
e742ffe
remove todo
howsoRes Nov 5, 2024
01b954f
22125: Adds logic to consider distance to most similar cases during …
howsoRes Nov 5, 2024
ff132dd
refactor per pr comments
howsoRes Nov 6, 2024
d5f0027
Merge branch 'main' into 22125-ablation-distance-comparison
howsoRes Nov 6, 2024
a6848db
Merge branch 'main' into 19790-train-ts-ablation
howsoRes Nov 7, 2024
ec73e2f
Merge branch '22125-ablation-distance-comparison' into 19790-train-ts…
howsoRes Nov 7, 2024
d254c7b
ablation fixes
howsoRes Nov 7, 2024
81213e9
Merge branch 'main' into 19790-train-ts-ablation
howsoRes Nov 7, 2024
1e90bcf
fix double encoding issue
howsoRes Nov 7, 2024
9975a22
fix encoding on ablation check
howsoRes Nov 7, 2024
3484610
Merge branch 'main' into 19790-train-ts-ablation
howsoRes Nov 7, 2024
1796706
make the ablation test stable
howsoRes Nov 8, 2024
9a22c31
whitespace fix
howsoRes Nov 8, 2024
e7f8078
docstring and ut fix
howsoRes Nov 8, 2024
697205f
Merge branch 'main' into 19790-train-ts-ablation
howsohazard Nov 8, 2024
155b60c
commnents
howsoRes Nov 8, 2024
64f8bc0
Merge branch '19790-train-ts-ablation' of github.com:howsoai/howso-en…
howsoRes Nov 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions howso.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,7 @@
"synthesis_utilities"
"synthesis_validation"
"train"
"train_ts_ablation"
"typing"
"update_cases"
"upgrade"
Expand Down
253 changes: 143 additions & 110 deletions howso/train.amlg
Original file line number Diff line number Diff line change
Expand Up @@ -433,7 +433,11 @@
)

;auto populate derived features if necessary
(if (> (size derived_features) 0)
(if (and
(> (size derived_features) 0)
;either non-time series or ablation was skipped and thus features were not derived yet
(or skip_ablation (= (null) !tsTimeFeature) )
)
(call !DeriveTrainFeatures (assoc
features features
;keep and derive only those features that are not in the features list
Expand Down Expand Up @@ -677,137 +681,145 @@
cases
)

;else ablating cases during training
(let
(assoc
train_features
;if auto ablate is enabled, populate the weight feature for this case
(if !autoAblationEnabled
(append features (list !autoAblationWeightFeature))
features
)
)
;time feature exists, do ablation on time series by deriving one series at a time and then ablating those cases prior to training
!tsTimeFeature
(call !TrainTimeSeriesAblation)

;if user explicitly specified to skip autoanalyze, send back "analyze" status so user knows an analyze is needed
(if skip_auto_analyze
(assign (assoc status_output "analyze"))
)
;else ablating cases during training
(call !TrainCasesWithAblation)
)

(declare (assoc
batch_data_mass_threshold 0
batch_data_mass 0
batch_size 0
input_case_index 0
output_case_ids []

;threshold-related variables
thresholds_enabled
(or
(size !autoAblationAbsThresholdMap)
(size !autoAblationDeltaThresholdMap)
(size !autoAblationRelThresholdMap)
)
prev_prediction_stats_map {}
new_prediction_stats_map {}
thresholds_satisfied (false)
))
#!TrainCasesWithAblation
(let
(assoc
train_features
;if auto ablate is enabled, populate the weight feature for this case
(if !autoAblationEnabled
(append features (list !autoAblationWeightFeature))
features
)
)

;split by batches of cases until next analyze
(while (< input_case_index (size cases))
;if user explicitly specified to skip autoanalyze, send back "analyze" status so user knows an analyze is needed
(if skip_auto_analyze
(assign (assoc status_output "analyze"))
)

(if (and thresholds_enabled (not skip_ablation))
(seq
(assign (assoc
prev_prediction_stats_map new_prediction_stats_map
))
(assign (assoc
new_prediction_stats_map
(get
(call !CalculateFeatureResiduals (assoc
features features
weight_feature !autoAblationWeightFeature
use_case_weights (true)
compute_all_statistics (true)
store_values (false)
))
"prediction_stats"
)
))
(assign (assoc
thresholds_satisfied
(apply "or"
(values
(call !CheckThresholds (assoc
abs_threshold_map !autoAblationAbsThresholdMap
delta_threshold_map !autoAblationDeltaThresholdMap
rel_threshold_map !autoAblationRelThresholdMap
prev_prediction_stats_map prev_prediction_stats_map
new_prediction_stats_map new_prediction_stats_map
))
)
)
))
)
(declare (assoc
batch_data_mass_threshold 0
batch_data_mass 0
batch_size 0
input_case_index 0
output_case_ids []

;threshold-related variables
thresholds_enabled
(or
(size !autoAblationAbsThresholdMap)
(size !autoAblationDeltaThresholdMap)
(size !autoAblationRelThresholdMap)
)
prev_prediction_stats_map {}
new_prediction_stats_map {}
thresholds_satisfied (false)
))

(assign (assoc
;always train a few extra cases since some are expected to be ablated
;to prevent this threshold value from dropping down to very small values
;but also limit to how much is trained at a time
batch_data_mass_threshold
(max
10
(min
(+ 10 (- !autoAnalyzeThreshold !dataMassChangeSinceLastAnalyze))
!ablationBatchSize
)
)
batch_data_mass 0
batch_size 0
))
;split by batches of cases until next analyze
(while (< input_case_index (size cases))

(while (and (< batch_data_mass batch_data_mass_threshold) (< (+ input_case_index (current_index)) (size cases)) )
(if (and thresholds_enabled (not skip_ablation))
(seq
(assign (assoc
prev_prediction_stats_map new_prediction_stats_map
))
(assign (assoc
new_prediction_stats_map
(get
(call !CalculateFeatureResiduals (assoc
features features
weight_feature !autoAblationWeightFeature
use_case_weights (true)
compute_all_statistics (true)
store_values (false)
))
"prediction_stats"
)
))
(assign (assoc
batch_data_mass
(+
(or (previous_result 1) 0)
(if accumulate_weight_feature
(or (get cases [(+ input_case_index (current_index 2)) weight_feature_index ]) 1)
1
thresholds_satisfied
(apply "or"
(values
(call !CheckThresholds (assoc
abs_threshold_map !autoAblationAbsThresholdMap
delta_threshold_map !autoAblationDeltaThresholdMap
rel_threshold_map !autoAblationRelThresholdMap
prev_prediction_stats_map prev_prediction_stats_map
new_prediction_stats_map new_prediction_stats_map
))
)
)
batch_size (+ 1 (current_index 1))
))
batch_data_mass
)
)

(assign (assoc
;always train a few extra cases since some are expected to be ablated
;to prevent this threshold value from dropping down to very small values
;but also limit to how much is trained at a time
batch_data_mass_threshold
(max
10
(min
(+ 10 (- !autoAnalyzeThreshold !dataMassChangeSinceLastAnalyze))
!ablationBatchSize
)
)
batch_data_mass 0
batch_size 0
))

(while (and (< batch_data_mass batch_data_mass_threshold) (< (+ input_case_index (current_index)) (size cases)) )
(assign (assoc
output_case_ids
(call !TrainCasesWithAblation (assoc
cases (unzip cases (range input_case_index (+ input_case_index batch_size -1)) )
;ensure that starting training index value is updated for each batch
session_training_index (+ trained_instance_count input_case_index)
))
batch_data_mass
(+
(or (previous_result 1) 0)
(if accumulate_weight_feature
(or (get cases [(+ input_case_index (current_index 2)) weight_feature_index ]) 1)
1
)
)
batch_size (+ 1 (current_index 1))
))
batch_data_mass
)
(assign (assoc
output_case_ids
(call !AblateCases (assoc
cases (unzip cases (range input_case_index (+ input_case_index batch_size -1)) )
;ensure that starting training index value is updated for each batch
session_training_index (+ trained_instance_count input_case_index)
))
))

(if (and run_autoanalyze_check (not skip_auto_analyze))
(call !AutoAnalyzeIfNeeded)
)

(accum (assoc input_case_index batch_size ))
(if (and run_autoanalyze_check (not skip_auto_analyze))
(call !AutoAnalyzeIfNeeded)
)

(if (> (current_index) 0)
(assign (assoc output_case_ids (append (previous_result 1) output_case_ids) ))
)
(accum (assoc input_case_index batch_size ))

output_case_ids
(if (> (current_index) 0)
(assign (assoc output_case_ids (append (previous_result 1) output_case_ids) ))
)

output_case_ids
)

output_case_ids
)


;Helper method to train cases with an ablation check
#!TrainCasesWithAblation
#!AblateCases
(let
(assoc
indices_to_train
Expand All @@ -831,6 +843,22 @@
))
))
)

;time series ablation explicitly keeps the first and last case of a series
(if ts_ablated_indices_map
(if (or
(= (- (size cases) 1) (current_index))
;first case and it's actually the first case of the series
(and
(= 0 (current_index))
;the .series_index feature is second to last
(= 0 (get feature_values (- (size features) 2)) )
)
)
(conclude (true))
)
)

;do not train on this case if it is null or all case values are null or it's within provided thresholds
;if one of the ablation methods returns false, then the case should be ablated.
(and
Expand All @@ -853,7 +881,12 @@
(accum (assoc
ablated_indices_list
(map
(lambda (+ session_training_index (current_value)))
(lambda
(if (size ts_ablated_indices_map)
(+ session_training_index (get ts_ablated_indices_map (current_value)))
(+ session_training_index (current_value))
)
)
(remove (indices cases) indices_to_train)
)
ablation_trained_instance_count (size indices_to_train)
Expand Down Expand Up @@ -926,7 +959,7 @@
)
))

;recompute influence weights entropy
;recompute influence weights entropy ::: TODO skip for TS ablation unless it's the last series of a train call
(call !ComputeAndStoreInfluenceWeightEntropies (assoc
features features
weight_feature accumulate_weight_feature
Expand Down
Loading