-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathSoftMaxRegression_Datumbox.groovy
81 lines (72 loc) · 3.51 KB
/
SoftMaxRegression_Datumbox.groovy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
/*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import com.datumbox.framework.common.Configuration
import com.datumbox.framework.core.common.dataobjects.Dataframe
import com.datumbox.framework.core.machinelearning.MLBuilder
import com.datumbox.framework.core.machinelearning.classification.SoftMaxRegression
import com.datumbox.framework.core.machinelearning.modelselection.metrics.ClassificationMetrics
import com.datumbox.framework.core.machinelearning.modelselection.splitters.ShuffleSplitter
import org.knowm.xchart.SwingWrapper
import org.knowm.xchart.XYChartBuilder
import static com.datumbox.framework.common.dataobjects.TypeInference.DataType.CATEGORICAL
import static com.datumbox.framework.common.dataobjects.TypeInference.DataType.NUMERICAL
import static org.knowm.xchart.XYSeries.XYSeriesRenderStyle.Scatter
def file = getClass().classLoader.getResource('iris_data.csv').file as File
def cols = ['Sepal length', 'Sepal width', 'Petal length', 'Petal width']
// RandomGenerator.globalSeed = -1L // for repeatable results
def config = Configuration.configuration
def headers = [*: cols.collectEntries { [it, NUMERICAL] }, Class: CATEGORICAL]
Dataframe data = null
def defaultSeps = [',' as char, '"' as char, "\r\n"]
file.withReader {
data = Dataframe.Builder.parseCSVFile(it, 'Class', headers, *defaultSeps, null, null, config)
}
def split = new ShuffleSplitter(0.8, 1).split(data).next()
Dataframe training = split.train
Dataframe testing = split.test
def classifierParams = new SoftMaxRegression.TrainingParameters(totalIterations: 200, learningRate: 0.1)
SoftMaxRegression classifier = MLBuilder.create(classifierParams, config)
classifier.fit(training)
classifier.save("Class")
classifier.predict(testing)
println "Results:"
def petalL = [:].withDefault{[]}
def petalW = [:].withDefault{[]}
testing.entries().each {
def key = it.key
def predicted = it.value.YPredicted
def correct = it.value.y == it.value.YPredicted
def probs = it.value.YPredictedProbabilities
def prefix = { it == predicted ? (correct ? '*' : '**') : '' }
def series = correct ? it.value.y : "$it.value.YPredicted/$it.value.y"
petalL[series] << it.value.x.get('Petal length')
petalW[series] << it.value.x.get('Petal width')
def probsForClass = { klass -> prefix(klass) + sprintf('%5.3f', probs.get(klass)) }
def probability = ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'].collect(probsForClass)
println "Record $key - Actual: $it.value.y, Predicted: $predicted (probabilities: $probability)"
}
def metrics = new ClassificationMetrics(testing)
println "Classifier Accuracy: $metrics.accuracy"
classifier.delete()
[training, testing]*.close()
def chart = new XYChartBuilder().width(900).height(450).title("Species Predicted[/Actual]").
xAxisTitle("Petal length").yAxisTitle("Petal width").build()
petalL.keySet().each {
chart.addSeries(it, petalW[it] as double[], petalL[it] as double[]).with {
XYSeriesRenderStyle = Scatter
}
}
new SwingWrapper(chart).displayChart()