Skip to content

Commit dd56e81

Browse files
committed
Merge branch '2.5' into 3.0
2 parents 57e1811 + 0fa98ce commit dd56e81

File tree

108 files changed

+659
-181
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

108 files changed

+659
-181
lines changed

.github/workflows/ci.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ jobs:
2020
with:
2121
php-version: ${{ matrix.php-versions }}
2222
tools: composer, pecl
23-
extensions: svm, mbstring, gd, fileinfo
23+
extensions: svm, mbstring, gd, fileinfo, swoole
2424
ini-values: memory_limit=-1
2525

2626
- name: Validate composer.json

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ Thumbs.db
88
.DS_Store
99
debug.log
1010
/test.png
11+
pyvenv.cfg
1112
.venv
1213
.idea
1314
.vscode

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@
1212
- 2.5.0
1313
- Added Vantage Point Spatial tree
1414
- Blob Generator can now `simulate()` a Dataset object
15+
- Added Wrapper interface
16+
- Added Swoole Backend
17+
- Plus Plus added check for min number of sample seeds
1518

1619
- 2.4.1
1720
- Sentence Tokenizer fix Arabic and Farsi language support

benchmarks/Classifiers/OneVsRestBench.php

+9-1
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,22 @@
22

33
namespace Rubix\ML\Benchmarks\Classifiers;
44

5+
use Rubix\ML\Backends\Backend;
56
use Rubix\ML\Classifiers\OneVsRest;
67
use Rubix\ML\Datasets\Generators\Blob;
78
use Rubix\ML\Classifiers\LogisticRegression;
89
use Rubix\ML\NeuralNet\Optimizers\Stochastic;
910
use Rubix\ML\Datasets\Generators\Agglomerate;
11+
use Rubix\ML\Tests\DataProvider\BackendProviderTrait;
1012

1113
/**
1214
* @Groups({"Classifiers"})
1315
* @BeforeMethods({"setUp"})
1416
*/
1517
class OneVsRestBench
1618
{
19+
use BackendProviderTrait;
20+
1721
protected const TRAINING_SIZE = 10000;
1822

1923
protected const TESTING_SIZE = 10000;
@@ -52,9 +56,13 @@ public function setUp() : void
5256
* @Subject
5357
* @Iterations(5)
5458
* @OutputTimeUnit("seconds", precision=3)
59+
* @ParamProviders("provideBackends")
60+
* @param array{ backend: Backend } $params
5561
*/
56-
public function trainPredict() : void
62+
public function trainPredict(array $params) : void
5763
{
64+
$this->estimator->setBackend($params['backend']);
65+
5866
$this->estimator->train($this->training);
5967

6068
$this->estimator->predict($this->testing);

benchmarks/Classifiers/RandomForestBench.php

+14-2
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,21 @@
22

33
namespace Rubix\ML\Benchmarks\Classifiers;
44

5+
use Rubix\ML\Backends\Backend;
56
use Rubix\ML\Classifiers\RandomForest;
67
use Rubix\ML\Datasets\Generators\Blob;
78
use Rubix\ML\Classifiers\ClassificationTree;
89
use Rubix\ML\Datasets\Generators\Agglomerate;
10+
use Rubix\ML\Tests\DataProvider\BackendProviderTrait;
911
use Rubix\ML\Transformers\IntervalDiscretizer;
1012

1113
/**
1214
* @Groups({"Classifiers"})
1315
*/
1416
class RandomForestBench
1517
{
18+
use BackendProviderTrait;
19+
1620
protected const TRAINING_SIZE = 10000;
1721

1822
protected const TESTING_SIZE = 10000;
@@ -70,9 +74,13 @@ public function setUpCategorical() : void
7074
* @Iterations(5)
7175
* @BeforeMethods({"setUpContinuous"})
7276
* @OutputTimeUnit("seconds", precision=3)
77+
* @ParamProviders("provideBackends")
78+
* @param array{ backend: Backend } $params
7379
*/
74-
public function continuous() : void
80+
public function continuous(array $params) : void
7581
{
82+
$this->estimator->setBackend($params['backend']);
83+
7684
$this->estimator->train($this->training);
7785

7886
$this->estimator->predict($this->testing);
@@ -83,9 +91,13 @@ public function continuous() : void
8391
* @Iterations(5)
8492
* @BeforeMethods({"setUpCategorical"})
8593
* @OutputTimeUnit("seconds", precision=3)
94+
* @ParamProviders("provideBackends")
95+
* @param array{ backend: Backend } $params
8696
*/
87-
public function categorical() : void
97+
public function categorical(array $params) : void
8898
{
99+
$this->estimator->setBackend($params['backend']);
100+
89101
$this->estimator->train($this->training);
90102

91103
$this->estimator->predict($this->testing);

composer.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,8 @@
4949
"phpstan/extension-installer": "^1.0",
5050
"phpstan/phpstan": "^1.0",
5151
"phpstan/phpstan-phpunit": "^1.0",
52-
"phpunit/phpunit": "^9.0"
52+
"phpunit/phpunit": "^9.0",
53+
"swoole/ide-helper": "^5.1"
5354
},
5455
"suggest": {
5556
"ext-tensor": "For fast Matrix/Vector computing",

docs/cross-validation/metrics/rand-index.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ $$
77
{\displaystyle ARI = {\frac {\left.\sum _{ij}{\binom {n_{ij}}{2}}-\left[\sum _{i}{\binom {a_{i}}{2}}\sum _{j}{\binom {b_{j}}{2}}\right]\right/{\binom {n}{2}}}{\left.{\frac {1}{2}}\left[\sum _{i}{\binom {a_{i}}{2}}+\sum _{j}{\binom {b_{j}}{2}}\right]-\left[\sum _{i}{\binom {a_{i}}{2}}\sum _{j}{\binom {b_{j}}{2}}\right]\right/{\binom {n}{2}}}}}
88
$$
99

10-
**Estimator Compatibility:** Regressor
10+
**Estimator Compatibility:** Clusterer
1111

1212
**Score Range:** -1 to 1
1313

phpstan.neon

+1
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@ parameters:
66
- 'benchmarks'
77
excludePaths:
88
- src/Backends/Amp.php
9+
- src/Backends/Swoole.php

phpunit.xml

+14-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
<?xml version="1.0" encoding="UTF-8"?>
2-
<phpunit xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" backupGlobals="false" backupStaticAttributes="false" bootstrap="vendor/autoload.php" colors="true" convertErrorsToExceptions="true" convertNoticesToExceptions="true" convertWarningsToExceptions="true" forceCoversAnnotation="true" processIsolation="false" stopOnFailure="false" xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/9.3/phpunit.xsd">
2+
<phpunit
3+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4+
backupGlobals="false"
5+
backupStaticAttributes="false"
6+
bootstrap="vendor/autoload.php"
7+
colors="true"
8+
convertErrorsToExceptions="true"
9+
convertNoticesToExceptions="true"
10+
convertWarningsToExceptions="true"
11+
forceCoversAnnotation="true"
12+
processIsolation="true"
13+
stopOnFailure="false"
14+
xsi:noNamespaceSchemaLocation="https://schema.phpunit.de/9.3/phpunit.xsd"
15+
>
316
<coverage processUncoveredFiles="true">
417
<include>
518
<directory suffix=".php">src</directory>

src/AnomalyDetectors/LocalOutlierFactor.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class LocalOutlierFactor implements Estimator, Learner, Scoring, Persistable
6767
*
6868
* @var Spatial
6969
*/
70-
protected \Rubix\ML\Graph\Trees\Spatial $tree;
70+
protected Spatial $tree;
7171

7272
/**
7373
* The precomputed k distances between each training sample and its k'th nearest neighbor.

src/AnomalyDetectors/Loda.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ class Loda implements Estimator, Learner, Online, Scoring, Persistable
100100
*
101101
* @var \Tensor\Matrix|null
102102
*/
103-
protected ?\Tensor\Matrix $r = null;
103+
protected ?Matrix $r = null;
104104

105105
/**
106106
* The edges and bin counts of each histogram.

src/AnomalyDetectors/OneClassSVM.php

+2-2
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ class OneClassSVM implements Estimator, Learner
4444
*
4545
* @var svm
4646
*/
47-
protected \svm $svm;
47+
protected svm $svm;
4848

4949
/**
5050
* The hyper-parameters of the model.
@@ -58,7 +58,7 @@ class OneClassSVM implements Estimator, Learner
5858
*
5959
* @var \svmmodel|null
6060
*/
61-
protected ?\svmmodel $model = null;
61+
protected ?svmmodel $model = null;
6262

6363
/**
6464
* @param float $nu

src/Backends/Swoole.php

+173
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
<?php
2+
3+
namespace Rubix\ML\Backends;
4+
5+
use Rubix\ML\Backends\Tasks\Task;
6+
use Rubix\ML\Specifications\ExtensionIsLoaded;
7+
use Rubix\ML\Specifications\SwooleExtensionIsLoaded;
8+
use RuntimeException;
9+
use Swoole\Atomic;
10+
use Swoole\Process;
11+
12+
use function Swoole\Coroutine\run;
13+
14+
/**
15+
* Swoole
16+
*
17+
* Works both with Swoole and OpenSwoole.
18+
*
19+
* @category Machine Learning
20+
* @package Rubix/ML
21+
*/
22+
class Swoole implements Backend
23+
{
24+
/**
25+
* The queue of tasks to be processed in parallel.
26+
*/
27+
protected array $queue = [];
28+
29+
private int $cpus;
30+
31+
private int $hasIgbinary;
32+
33+
public function __construct()
34+
{
35+
SwooleExtensionIsLoaded::create()->check();
36+
37+
$this->cpus = swoole_cpu_num();
38+
$this->hasIgbinary = ExtensionIsLoaded::with('igbinary')->passes();
39+
}
40+
41+
/**
42+
* Queue up a deferred task for backend processing.
43+
*
44+
* @internal
45+
*
46+
* @param Task $task
47+
* @param callable(mixed,mixed):void $after
48+
* @param mixed $context
49+
*/
50+
public function enqueue(Task $task, ?callable $after = null, $context = null) : void
51+
{
52+
$this->queue[] = function () use ($task, $after, $context) {
53+
$result = $task();
54+
55+
if ($after) {
56+
$after($result, $context);
57+
}
58+
59+
return $result;
60+
};
61+
}
62+
63+
/**
64+
* Process the queue and return the results.
65+
*
66+
* @internal
67+
*
68+
* @return mixed[]
69+
*/
70+
public function process() : array
71+
{
72+
$results = [];
73+
74+
$maxMessageLength = new Atomic(0);
75+
$workerProcesses = [];
76+
77+
$currentCpu = 0;
78+
79+
foreach ($this->queue as $index => $queueItem) {
80+
$workerProcess = new Process(
81+
function (Process $worker) use ($maxMessageLength, $queueItem) {
82+
$serialized = $this->serialize($queueItem());
83+
84+
$serializedLength = strlen($serialized);
85+
$currentMaxSerializedLength = $maxMessageLength->get();
86+
87+
if ($serializedLength > $currentMaxSerializedLength) {
88+
$maxMessageLength->set($serializedLength);
89+
}
90+
91+
$worker->exportSocket()->send($serialized);
92+
},
93+
// redirect_stdin_and_stdout
94+
false,
95+
// pipe_type
96+
SOCK_DGRAM,
97+
// enable_coroutine
98+
true,
99+
);
100+
101+
$workerProcess->setAffinity([$currentCpu]);
102+
$workerProcess->setBlocking(false);
103+
$workerProcess->start();
104+
105+
$workerProcesses[$index] = $workerProcess;
106+
107+
$currentCpu = ($currentCpu + 1) % $this->cpus;
108+
}
109+
110+
run(function () use ($maxMessageLength, &$results, $workerProcesses) {
111+
foreach ($workerProcesses as $index => $workerProcess) {
112+
$status = $workerProcess->wait();
113+
114+
if (0 !== $status['code']) {
115+
throw new RuntimeException('Worker process exited with an error');
116+
}
117+
118+
$socket = $workerProcess->exportSocket();
119+
120+
if ($socket->isClosed()) {
121+
throw new RuntimeException('Coroutine socket is closed');
122+
}
123+
124+
$maxMessageLengthValue = $maxMessageLength->get();
125+
126+
$receivedData = $socket->recv($maxMessageLengthValue);
127+
$unserialized = $this->unserialize($receivedData);
128+
129+
$results[] = $unserialized;
130+
}
131+
});
132+
133+
return $results;
134+
}
135+
136+
/**
137+
* Flush the queue
138+
*/
139+
public function flush() : void
140+
{
141+
$this->queue = [];
142+
}
143+
144+
private function serialize(mixed $data) : string
145+
{
146+
if ($this->hasIgbinary) {
147+
return igbinary_serialize($data);
148+
}
149+
150+
return serialize($data);
151+
}
152+
153+
private function unserialize(string $serialized) : mixed
154+
{
155+
if ($this->hasIgbinary) {
156+
return igbinary_unserialize($serialized);
157+
}
158+
159+
return unserialize($serialized);
160+
}
161+
162+
/**
163+
* Return the string representation of the object.
164+
*
165+
* @internal
166+
*
167+
* @return string
168+
*/
169+
public function __toString() : string
170+
{
171+
return 'Swoole';
172+
}
173+
}

src/BootstrapAggregator.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class BootstrapAggregator implements Estimator, Learner, Parallel, Persistable
6464
*
6565
* @var Learner
6666
*/
67-
protected \Rubix\ML\Learner $base;
67+
protected Learner $base;
6868

6969
/**
7070
* The number of base learners to train in the ensemble.

src/Classifiers/AdaBoost.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ class AdaBoost implements Estimator, Learner, Probabilistic, Verbose, Persistabl
7272
*
7373
* @var Learner
7474
*/
75-
protected \Rubix\ML\Learner $base;
75+
protected Learner $base;
7676

7777
/**
7878
* The learning rate of the ensemble i.e. the *shrinkage* applied to each step.

src/Classifiers/KDNeighbors.php

+1-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ class KDNeighbors implements Estimator, Learner, Probabilistic, Persistable
6060
*
6161
* @var Spatial
6262
*/
63-
protected \Rubix\ML\Graph\Trees\Spatial $tree;
63+
protected Spatial $tree;
6464

6565
/**
6666
* The zero vector for the possible class outcomes.

0 commit comments

Comments
 (0)