@@ -72,34 +72,45 @@ def __init__(self, config: Dict[str, Any], num_categories_per_col: np.ndarray, n
72
72
73
73
self .ee_layers = self ._create_ee_layers ()
74
74
75
- def get_partial_models (self , subset_features : List [int ]) -> "_LearnedEntityEmbedding" :
75
+ def get_partial_models (self ,
76
+ n_excl_embed_features : int ,
77
+ idx_embed_feat_partial : List [int ]) -> "_LearnedEntityEmbedding" :
76
78
"""
77
79
extract a partial models that only works on a subset of the data that ought to be passed to the embedding
78
80
network, this function is implemented for time series forecasting tasks where the known future features is only
79
81
a subset of the past features
80
82
Args:
81
- subset_features (List[int]):
82
- a set of index identifying which features will pass through the partial model
83
+ n_excl_embed_features (int):
84
+ number of unembedded features
85
+ idx_embed_feat_partial (List[int]):
86
+ a set of index identifying the which embedding features will be inherited by the partial model
83
87
84
88
Returns:
85
89
partial_model (_LearnedEntityEmbedding)
86
90
a new partial model
87
91
"""
88
- num_input_features = self .num_categories_per_col [subset_features ]
89
- num_features_excl_embed = sum ([sf < self .num_features_excl_embed for sf in subset_features ])
92
+ n_partial_features = n_excl_embed_features + len (idx_embed_feat_partial )
90
93
91
- num_output_dimensions = [ self . num_output_dimensions [ sf ] for sf in subset_features ]
92
- embed_features = [self . embed_features [ sf ] for sf in subset_features ]
94
+ num_categories_per_col = np . zeros ( n_partial_features , dtype = np . int16 )
95
+ num_output_dimensions = [1 ] * n_partial_features
93
96
94
97
ee_layers = []
95
- ee_layer_tracker = 0
96
- for sf in subset_features :
97
- if self .embed_features [sf ]:
98
- ee_layers .append (self .ee_layers [ee_layer_tracker ])
99
- ee_layer_tracker += 1
98
+ for idx , idx_embed in enumerate (idx_embed_feat_partial ):
99
+ idx_raw = self .num_features_excl_embed + idx_embed
100
+ n_embed = self .num_categories_per_col [idx_raw ]
101
+ n_output = self .num_output_dimensions [idx_raw ]
102
+
103
+ idx_new = n_excl_embed_features + idx
104
+ num_categories_per_col [idx_new ] = n_embed
105
+ num_output_dimensions [idx_new ] = n_output
106
+
107
+ ee_layers .append (self .ee_layers [idx_embed ])
108
+
100
109
ee_layers = nn .ModuleList (ee_layers )
101
110
102
- return PartialLearnedEntityEmbedding (num_input_features , num_features_excl_embed , embed_features ,
111
+ embed_features = num_categories_per_col > 0
112
+
113
+ return PartialLearnedEntityEmbedding (num_categories_per_col , n_excl_embed_features , embed_features ,
103
114
num_output_dimensions , ee_layers )
104
115
105
116
def forward (self , x : torch .Tensor ) -> torch .Tensor :
@@ -108,10 +119,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
108
119
concat_seq = []
109
120
110
121
layer_pointer = 0
111
- # Time series tasks need to add targets to the embeddings. However, the target information is not recorded
112
- # by autoPyTorch's embeddings. Therefore, we need to add the targets parts to `concat_seq` manually, which is
113
- # the last few dimensions of the input x
114
- # we assign x_pointer to 0 beforehand to avoid the case that self.embed_features has 0 length
122
+ # Given that our embedding network is only applied to the last few feature columns self.embed_features
115
123
x_pointer = 0
116
124
for x_pointer , embed in enumerate (self .embed_features ):
117
125
if not embed :
@@ -121,9 +129,8 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
121
129
current_feature_slice = x [..., x_pointer ]
122
130
current_feature_slice = current_feature_slice .to (torch .int )
123
131
concat_seq .append (self .ee_layers [layer_pointer ](current_feature_slice ))
124
- layer_pointer += 1
125
132
126
- concat_seq . append ( x [..., x_pointer :])
133
+ layer_pointer += 1
127
134
128
135
return torch .cat (concat_seq , dim = - 1 )
129
136
0 commit comments