1414from sklearn .metrics import mean_absolute_error
1515
1616
17- def absolute_loss (y_true , y_pred ):
17+ def absolute_loss (y_true , y_pred , sample_weight = None ):
1818 """
1919 Computes the absolute loss for regression.
2020
2121 :param y_true: array-like or label indicator matrix
2222 Ground truth (correct) values.
2323 :param y_pred: array-like or label indicator matrix
2424 Predicted values, as returned by a regression estimator.
25+ :param sample_weight: sample weights
2526 :return: loss, float
2627 The degree to which the samples are correctly predicted.
2728 """
28- return np .sum (np .abs (y_true - y_pred )) / y_true .shape [0 ]
29+ if sample_weight is None :
30+ return np .sum (np .abs (y_true - y_pred )) / y_true .shape [0 ]
31+ return (
32+ np .average (np .abs (y_true - y_pred ), weights = sample_weight , axis = 0 )
33+ / y_true .shape [0 ]
34+ )
2935
3036
3137def float_sign (a ):
@@ -132,7 +138,7 @@ def _modify_loss_derivatives(self, last_deltas):
132138 return DERIVATIVE_LOSS_FUNCTIONS ["absolute_loss" ](last_deltas )
133139 return last_deltas
134140
135- def _backprop (self , X , y , activations , deltas , coef_grads , intercept_grads ):
141+ def _backprop (self , * args ):
136142 """
137143 Computes the MLP loss function and its corresponding derivatives
138144 with respect to each parameter: weights and bias vectors.
@@ -141,6 +147,8 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
141147 The input data.
142148 :param y: array-like, shape (n_samples,)
143149 The target values.
150+ :param sample_weight: array-like of shape (n_samples,), default=None
151+ Sample weights.
144152 :param activations: list, length = n_layers - 1
145153 The ith element of the list holds the values of the ith layer.
146154 :param deltas: list, length = n_layers - 1
@@ -155,10 +163,18 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
155163 :param intercept_grads: list, length = n_layers - 1
156164 The ith element contains the amount of change used to update the
157165 intercept parameters of the ith layer in an iteration.
158- :return: loss, float
159- :return: coef_grads, list, length = n_layers - 1
160- :return: intercept_grads, list, length = n_layers - 1
166+ :return: loss (float),
167+ coef_grads (list, length = n_layers - 1)
168+ intercept_grads: (list, length = n_layers - 1)
169+
170+
161171 """
172+ if len (args ) == 6 :
173+ X , y , activations , deltas , coef_grads , intercept_grads = args
174+ sample_weight = None
175+ else :
176+ X , y , sample_weight , activations , deltas , coef_grads , intercept_grads = args
177+
162178 n_samples = X .shape [0 ]
163179
164180 # Forward propagate
@@ -169,10 +185,12 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
169185 if loss_func_name == "log_loss" and self .out_activation_ == "logistic" :
170186 loss_func_name = "binary_log_loss"
171187 loss_function = self ._get_loss_function (loss_func_name )
172- loss = loss_function (y , activations [- 1 ])
188+ loss = loss_function (y , activations [- 1 ], sample_weight )
173189 # Add L2 regularization term to loss
174190 values = np .sum (np .array ([np .dot (s .ravel (), s .ravel ()) for s in self .coefs_ ]))
175- loss += (0.5 * self .alpha ) * values / n_samples
191+
192+ sw_sum = n_samples if sample_weight is None else sample_weight .sum ()
193+ loss += (0.5 * self .alpha ) * values / sw_sum
176194
177195 # Backward propagate
178196 last = self .n_layers_ - 2
@@ -182,20 +200,22 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
182200 # sigmoid and binary cross entropy, softmax and categorical cross
183201 # entropy, and identity with squared loss
184202 deltas [last ] = activations [- 1 ] - y
203+ if sample_weight is not None :
204+ deltas [last ] *= sample_weight .reshape (- 1 , 1 )
185205
186206 # We insert the following modification to modify the gradient
187207 # due to the modification of the loss function.
188208 deltas [last ] = self ._modify_loss_derivatives (deltas [last ])
189209
190210 # Compute gradient for the last layer
191211 temp = self ._compute_loss_grad (
192- last , n_samples , activations , deltas , coef_grads , intercept_grads
212+ last , sw_sum , activations , deltas , coef_grads , intercept_grads
193213 )
194214 if temp is None :
195215 # recent version of scikit-learn
196216 # Compute gradient for the last layer
197217 self ._compute_loss_grad (
198- last , n_samples , activations , deltas , coef_grads , intercept_grads
218+ last , sw_sum , activations , deltas , coef_grads , intercept_grads
199219 )
200220
201221 inplace_derivative = DERIVATIVES [self .activation ]
@@ -205,7 +225,7 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
205225 inplace_derivative (activations [i ], deltas [i - 1 ])
206226
207227 self ._compute_loss_grad (
208- i - 1 , n_samples , activations , deltas , coef_grads , intercept_grads
228+ i - 1 , sw_sum , activations , deltas , coef_grads , intercept_grads
209229 )
210230 else :
211231 coef_grads , intercept_grads = temp
@@ -220,7 +240,7 @@ def _backprop(self, X, y, activations, deltas, coef_grads, intercept_grads):
220240 coef_grads ,
221241 intercept_grads ,
222242 ) = self ._compute_loss_grad (
223- i - 1 , n_samples , activations , deltas , coef_grads , intercept_grads
243+ i - 1 , sw_sum , activations , deltas , coef_grads , intercept_grads
224244 )
225245
226246 return loss , coef_grads , intercept_grads
0 commit comments