Skip to content

Commit 1f3bdec

Browse files
committed
Merge branch 'master' of github.com:/RubixML/ML
2 parents ef104f3 + a77103a commit 1f3bdec

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+58
-53
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
- 2.4.1
22
- Sentence Tokenizer fix Arabic and Farsi language support
3+
- Optimize online variance updating
34

45
- 2.4.0
56
- Add GELU activation function

benchmarks/AnomalyDetectors/GaussianMLEBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class GaussianMLEBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/AnomalyDetectors/IsolationForestBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class IsolationForestBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/AnomalyDetectors/LocalOutlierFactorBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class LocalOutlierFactorBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/AnomalyDetectors/LodaBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class LodaBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/AnomalyDetectors/OneClassSVMBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class OneClassSVMBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/AnomalyDetectors/RobustZScoreBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class RobustZScoreBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/AdaBoostBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
class AdaBoostBench
1515
{
16-
protected const TRAINING_SIZE = 2500;
16+
protected const TRAINING_SIZE = 10000;
1717

1818
protected const TESTING_SIZE = 10000;
1919

benchmarks/Classifiers/ClassificationTreeBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class ClassificationTreeBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/ExtraTreeClassifierBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class ExtraTreeClassifierBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/GaussianNBBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class GaussianNBBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/KDNeighborsBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class KDNeighborsBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/KNearestNeighborsBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class KNearestNeighborsBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/LogisticRegressionBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class LogisticRegressionBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/LogitBoostBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class LogitBoostBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/MultilayerPerceptronBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*/
1616
class MultilayerPerceptronBench
1717
{
18-
protected const TRAINING_SIZE = 2500;
18+
protected const TRAINING_SIZE = 10000;
1919

2020
protected const TESTING_SIZE = 10000;
2121

benchmarks/Classifiers/NaiveBayesBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
class NaiveBayesBench
1515
{
16-
protected const TRAINING_SIZE = 2500;
16+
protected const TRAINING_SIZE = 10000;
1717

1818
protected const TESTING_SIZE = 10000;
1919

benchmarks/Classifiers/OneVsRestBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*/
1515
class OneVsRestBench
1616
{
17-
protected const TRAINING_SIZE = 2500;
17+
protected const TRAINING_SIZE = 10000;
1818

1919
protected const TESTING_SIZE = 10000;
2020

benchmarks/Classifiers/RadiusNeighborsBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class RadiusNeighborsBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Classifiers/RandomForestBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
class RandomForestBench
1515
{
16-
protected const TRAINING_SIZE = 2500;
16+
protected const TRAINING_SIZE = 10000;
1717

1818
protected const TESTING_SIZE = 10000;
1919

benchmarks/Classifiers/SVCBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
class SVCBench
1515
{
16-
protected const TRAINING_SIZE = 2500;
16+
protected const TRAINING_SIZE = 10000;
1717

1818
protected const TESTING_SIZE = 10000;
1919

benchmarks/Classifiers/SoftmaxClassifierBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class SoftmaxClassifierBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Clusterers/FuzzyCMeansBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class FuzzyCMeansBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Clusterers/GaussianMixtureBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class GaussianMixtureBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Clusterers/KMeansBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class KMeansBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Clusterers/MeanShiftBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
*/
1313
class MeanShiftBench
1414
{
15-
protected const TRAINING_SIZE = 2500;
15+
protected const TRAINING_SIZE = 10000;
1616

1717
protected const TESTING_SIZE = 10000;
1818

benchmarks/Persisters/Serializers/GzipNativeBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
class GzipNativeBench
1515
{
16-
protected const TRAINING_SIZE = 2500;
16+
protected const TRAINING_SIZE = 10000;
1717

1818
/**
1919
* @var \Rubix\ML\Serializers\GzipNative

benchmarks/Persisters/Serializers/NativeBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
class NativeBench
1515
{
16-
protected const TRAINING_SIZE = 2500;
16+
protected const TRAINING_SIZE = 10000;
1717

1818
/**
1919
* @var \Rubix\ML\Serializers\Native

benchmarks/Persisters/Serializers/RBXBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
*/
1414
class RBXBench
1515
{
16-
protected const TRAINING_SIZE = 2500;
16+
protected const TRAINING_SIZE = 10000;
1717

1818
/**
1919
* @var \Rubix\ML\Serializers\RBX

benchmarks/Regressors/AdalineBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class AdalineBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

benchmarks/Regressors/ExtraTreeRegressorBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class ExtraTreeRegressorBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

benchmarks/Regressors/GradientBoostBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class GradientBoostBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

benchmarks/Regressors/KDNeighborsRegressorBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class KDNeighborsRegressorBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

benchmarks/Regressors/KNNRegressorBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class KNNRegressorBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

benchmarks/Regressors/MLPRegressorBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*/
1515
class MLPRegressorBench
1616
{
17-
protected const TRAINING_SIZE = 2500;
17+
protected const TRAINING_SIZE = 10000;
1818

1919
protected const TESTING_SIZE = 10000;
2020

benchmarks/Regressors/RadiusNeighborsRegressorBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class RadiusNeighborsRegressorBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

benchmarks/Regressors/RegressionTreeBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class RegressionTreeBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

benchmarks/Regressors/RidgeBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class RidgeBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

benchmarks/Regressors/SVRBench.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
*/
1212
class SVRBench
1313
{
14-
protected const TRAINING_SIZE = 2500;
14+
protected const TRAINING_SIZE = 10000;
1515

1616
protected const TESTING_SIZE = 10000;
1717

src/AnomalyDetectors/GaussianMLE.php

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,8 @@ public function partial(Dataset $dataset) : void
244244

245245
$n = $dataset->numSamples();
246246

247+
$weight = $this->n + $n;
248+
247249
foreach ($dataset->features() as $column => $values) {
248250
[$mean, $variance] = Stats::meanVar($values);
249251

@@ -253,13 +255,13 @@ public function partial(Dataset $dataset) : void
253255
$oldVariance -= $this->epsilon;
254256

255257
$this->means[$column] = (($this->n * $oldMean)
256-
+ ($n * $mean)) / ($this->n + $n);
258+
+ ($n * $mean)) / $weight;
257259

258260
$this->variances[$column] = ($this->n
259261
* $oldVariance + ($n * $variance)
260-
+ ($this->n / ($n * ($this->n + $n)))
262+
+ ($this->n / ($n * $weight))
261263
* ($n * $oldMean - $n * $mean) ** 2)
262-
/ ($this->n + $n);
264+
/ $weight;
263265
}
264266

265267
$epsilon = max($this->smoothing * max($this->variances), CPU::epsilon());
@@ -270,15 +272,15 @@ public function partial(Dataset $dataset) : void
270272

271273
$this->epsilon = $epsilon;
272274

273-
$this->n += $n;
275+
$this->n = $weight;
274276

275277
$lls = array_map([$this, 'logLikelihood'], $dataset->samples());
276278

277279
$threshold = Stats::quantile($lls, 1.0 - $this->contamination);
278280

279-
$weight = $n / $this->n;
281+
$proportion = $n / $this->n;
280282

281-
$this->threshold = (1.0 - $weight) * $this->threshold + $weight * $threshold;
283+
$this->threshold = $proportion * $threshold + (1.0 - $proportion) * $this->threshold;
282284
}
283285

284286
/**

0 commit comments

Comments
 (0)