Skip to content

Commit

Permalink
Source code for updated clustering tutorial (dotnet#521)
Browse files Browse the repository at this point in the history
  • Loading branch information
pkulikov authored and Ron Petrusha committed Dec 17, 2018
1 parent b48e921 commit b4adcf7
Show file tree
Hide file tree
Showing 5 changed files with 170 additions and 0 deletions.
25 changes: 25 additions & 0 deletions machine-learning/tutorials/IrisFlowerClustering.sln
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio 15
VisualStudioVersion = 15.0.28307.168
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "IrisFlowerClustering", "IrisFlowerClustering\IrisFlowerClustering.csproj", "{B1297D9B-46A6-4A03-A04E-65B2FAC0B687}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Release|Any CPU = Release|Any CPU
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{B1297D9B-46A6-4A03-A04E-65B2FAC0B687}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{B1297D9B-46A6-4A03-A04E-65B2FAC0B687}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B1297D9B-46A6-4A03-A04E-65B2FAC0B687}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B1297D9B-46A6-4A03-A04E-65B2FAC0B687}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {30D318E3-A018-46F2-8D53-F72211D6BA70}
EndGlobalSection
EndGlobal
32 changes: 32 additions & 0 deletions machine-learning/tutorials/IrisFlowerClustering/IrisData.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// <SnippetUsings>
using Microsoft.ML.Runtime.Api;
// </SnippetUsings>

namespace IrisFlowerClustering
{
// <SnippetClassDefinitions>
public class IrisData
{
[Column("0")]
public float SepalLength;

[Column("1")]
public float SepalWidth;

[Column("2")]
public float PetalLength;

[Column("3")]
public float PetalWidth;
}

public class ClusterPrediction
{
[ColumnName("PredictedLabel")]
public uint PredictedClusterId;

[ColumnName("Score")]
public float[] Distances;
}
// </SnippetClassDefinitions>
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>netcoreapp2.2</TargetFramework>
</PropertyGroup>

<ItemGroup>
<Folder Include="Data\" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML" Version="0.8.0" />
</ItemGroup>

<ItemGroup>
<None Update="Data\iris.data">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>

</Project>
74 changes: 74 additions & 0 deletions machine-learning/tutorials/IrisFlowerClustering/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// <SnippetUsingsForPaths>
using System;
using System.IO;
// </SnippetUsingsForPaths>

// <SnippetMLUsings>
using Microsoft.ML;
using Microsoft.ML.Runtime.Data;
// </SnippetMLUsings>

namespace IrisFlowerClustering
{
class Program
{
// <SnippetPaths>
static readonly string _dataPath = Path.Combine(Environment.CurrentDirectory, "Data", "iris.data");
static readonly string _modelPath = Path.Combine(Environment.CurrentDirectory, "Data", "IrisClusteringModel.zip");
// </SnippetPaths>

static void Main(string[] args)
{
// <SnippetCreateContext>
var mlContext = new MLContext(seed: 0);
// </SnippetCreateContext>

// <SnippetSetupTextLoader>
TextLoader textLoader = mlContext.Data.TextReader(new TextLoader.Arguments()
{
Separator = ",",
HasHeader = false,
Column = new[]
{
new TextLoader.Column("SepalLength", DataKind.R4, 0),
new TextLoader.Column("SepalWidth", DataKind.R4, 1),
new TextLoader.Column("PetalLength", DataKind.R4, 2),
new TextLoader.Column("PetalWidth", DataKind.R4, 3)
}
});
// </SnippetSetupTextLoader>

// <SnippetCreateDataView>
IDataView dataView = textLoader.Read(_dataPath);
// </SnippetCreateDataView>

// <SnippetCreatePipeline>
string featuresColumnName = "Features";
var pipeline = mlContext.Transforms
.Concatenate(featuresColumnName, "SepalLength", "SepalWidth", "PetalLength", "PetalWidth")
.Append(mlContext.Clustering.Trainers.KMeans(featuresColumnName, clustersCount: 3));
// </SnippetCreatePipeline>

// <SnippetTrainModel>
var model = pipeline.Fit(dataView);
// </SnippetTrainModel>

// <SnippetSaveModel>
using (var fileStream = new FileStream(_modelPath, FileMode.Create, FileAccess.Write, FileShare.Write))
{
mlContext.Model.Save(model, fileStream);
}
// </SnippetSaveModel>

// <SnippetPredictor>
var predictor = model.MakePredictionFunction<IrisData, ClusterPrediction>(mlContext);
// </SnippetPredictor>

// <SnippetPredictionExample>
var prediction = predictor.Predict(TestIrisData.Setosa);
Console.WriteLine($"Cluster: {prediction.PredictedClusterId}");
Console.WriteLine($"Distances: {string.Join(" ", prediction.Distances)}");
// </SnippetPredictionExample>
}
}
}
17 changes: 17 additions & 0 deletions machine-learning/tutorials/IrisFlowerClustering/TestIrisData.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
namespace IrisFlowerClustering
{
// <SnippetStatic>
static class TestIrisData
// </SnippetStatic>
{
// <SnippetTestData>
internal static readonly IrisData Setosa = new IrisData
{
SepalLength = 5.1f,
SepalWidth = 3.5f,
PetalLength = 1.4f,
PetalWidth = 0.2f
};
// </SnippetTestData>
}
}

0 comments on commit b4adcf7

Please sign in to comment.