Skip to content

Commit 23c45aa

Browse files
Add serializble pipeline && titanic example for sweepable api (dotnet#6108)
1 parent f34b24e commit 23c45aa

23 files changed

+2592
-16
lines changed

Diff for: docs/samples/Microsoft.ML.AutoML.Samples/Microsoft.ML.AutoML.Samples.csproj

+4-8
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,13 @@
77
</PropertyGroup>
88

99
<ItemGroup>
10+
<ProjectReference Include="..\..\..\src\Microsoft.Data.Analysis\Microsoft.Data.Analysis.csproj" />
1011
<ProjectReference Include="..\..\..\src\Microsoft.ML.AutoML\Microsoft.ML.AutoML.csproj" />
11-
<ProjectReference Include="..\..\..\src\Microsoft.ML.Core\Microsoft.ML.Core.csproj" >
12-
<PrivateAssets>all</PrivateAssets>
13-
</ProjectReference>
14-
15-
<ProjectReference Include="..\..\..\src\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" >
16-
<PrivateAssets>all</PrivateAssets>
17-
</ProjectReference>
18-
12+
<ProjectReference Include="..\..\..\src\Microsoft.ML.Core\Microsoft.ML.Core.csproj" />
13+
<ProjectReference Include="..\..\..\src\Microsoft.ML.Transforms\Microsoft.ML.Transforms.csproj" />
1914
<NativeAssemblyReference Include="MatrixFactorizationNative" />
2015
<NativeAssemblyReference Include="FastTreeNative" />
16+
<PackageReference Include="System.Text.Json" Version="$(SystemTextJsonVersion)" />
2117
</ItemGroup>
2218
<ItemGroup>
2319
<PackageReference Include="SciSharp.TensorFlow.Redist" Version="$(TensorFlowVersion)" />

Diff for: src/Microsoft.ML.AutoML/API/AutoCatalog.cs

+11
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// The .NET Foundation licenses this file to you under the MIT license.
33
// See the LICENSE file in the project root for more information.
44

5+
using System;
56
using Microsoft.ML.Data;
7+
using Microsoft.ML.SearchSpace;
68

79
namespace Microsoft.ML.AutoML
810
{
@@ -278,5 +280,14 @@ public ColumnInferenceResults InferColumns(string path, uint labelColumnIndex, b
278280
UserInputValidationUtil.ValidateInferColumnsArgs(path);
279281
return ColumnInferenceApi.InferColumns(_context, path, labelColumnIndex, hasHeader, separatorChar, allowQuoting, allowSparse, trimWhitespace, groupColumns);
280282
}
283+
284+
/// <summary>
285+
/// Create a sweepable estimator with a custom factory and search space.
286+
/// </summary>
287+
internal SweepableEstimator CreateSweepableEstimator<T>(Func<MLContext, T, IEstimator<ITransformer>> factory, SearchSpace<T> ss = null)
288+
where T : class, new()
289+
{
290+
return new SweepableEstimator((MLContext context, Parameter param) => factory(context, param.AsType<T>()), ss);
291+
}
281292
}
282293
}

Diff for: src/Microsoft.ML.AutoML/API/SweepableExtension.cs

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using System.Collections.Generic;
7+
using System.Text;
8+
9+
namespace Microsoft.ML.AutoML
10+
{
11+
internal static class SweepableExtension
12+
{
13+
public static SweepableEstimatorPipeline Append(this IEstimator<ITransformer> estimator, SweepableEstimator estimator1)
14+
{
15+
return new SweepableEstimatorPipeline().Append(estimator).Append(estimator1);
16+
}
17+
18+
public static SweepableEstimatorPipeline Append(this SweepableEstimatorPipeline pipeline, IEstimator<ITransformer> estimator1)
19+
{
20+
return pipeline.Append(new SweepableEstimator((context, parameter) => estimator1, new SearchSpace.SearchSpace()));
21+
}
22+
23+
public static SweepableEstimatorPipeline Append(this SweepableEstimator estimator, SweepableEstimator estimator1)
24+
{
25+
return new SweepableEstimatorPipeline().Append(estimator).Append(estimator1);
26+
}
27+
28+
public static SweepableEstimatorPipeline Append(this SweepableEstimator estimator, IEstimator<ITransformer> estimator1)
29+
{
30+
return estimator.Append(estimator1);
31+
}
32+
}
33+
}

Diff for: src/Microsoft.ML.AutoML/CodeGen/code_gen_flag.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"EstimatorFactoryGenerator": false,
33
"CodeGenCatalogGenerator": false,
4-
"EstimatorTypeGenerator": false,
4+
"EstimatorTypeGenerator": true,
55
"SearchSpaceGenerator": true,
66
"SweepableEstimatorGenerator": false
77
}
+211
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
{
2+
"$schema": "http://json-schema.org/draft-04/schema",
3+
"title": "estimator schema",
4+
5+
"type": "object",
6+
"properties": {
7+
"estimators": {
8+
"type": "array",
9+
"items": {
10+
"type": "object",
11+
"properties": {
12+
"estimatorTypes": {
13+
"type": "array",
14+
"items": {
15+
"type": "string",
16+
"enum": [
17+
"OneVersusAll",
18+
"BinaryClassification",
19+
"Regression",
20+
"MultiClassification",
21+
"Ranking",
22+
"Recommendation",
23+
"Transforms",
24+
"Categorical",
25+
"Conversion",
26+
"Text",
27+
"Calibrators",
28+
"Forecasting"
29+
]
30+
}
31+
},
32+
"functionName": {
33+
"type": "string",
34+
"description": "estimator's function name.",
35+
"enum": [
36+
"LightGbm",
37+
"FastTree",
38+
"FastForest",
39+
"FastTreeTweedie",
40+
"AveragedPerceptron",
41+
"LinearSvm",
42+
"LbfgsLogisticRegression",
43+
"LbfgsMaximumEntropy",
44+
"OnlineGradientDescent",
45+
"Ols",
46+
"LbfgsPoissonRegression",
47+
"SdcaLogisticRegression",
48+
"SdcaMaximumEntropy",
49+
"Sdca",
50+
"SgdCalibrated",
51+
"SymbolicSgdLogisticRegression",
52+
"MatrixFactorization",
53+
"ImageClassification",
54+
"Concatenate",
55+
"OneHotEncoding",
56+
"OneHotHashEncoding",
57+
"LoadRawImageBytes",
58+
"CopyColumns",
59+
"Hash",
60+
"MapKeyToValue",
61+
"IndicateMissingValues",
62+
"ReplaceMissingValues",
63+
"NormalizeMinMax",
64+
"LoadImages",
65+
"FeaturizeText",
66+
"ConvertType",
67+
"MapValueToKey",
68+
"ApplyOnnxModel",
69+
"ResizeImages",
70+
"ExtractPixels",
71+
"Naive",
72+
"ForecastBySsa"
73+
]
74+
},
75+
"nugetDependencies": {
76+
"type": "array",
77+
"description": "nuget dependencies",
78+
"minItems": 0,
79+
"items": {
80+
"type": "string",
81+
"enum": [
82+
"Microsoft.ML",
83+
"Microsoft.ML.LightGbm",
84+
"Microsoft.ML.FastTree",
85+
"Microsoft.ML.Mkl.Components",
86+
"Microsoft.ML.Recommender",
87+
"Microsoft.ML.Vision",
88+
"Microsoft.ML.ImageAnalytics",
89+
"SciSharp.TensorFlow.Redist",
90+
"Microsoft.ML.OnnxTransformer",
91+
"Microsoft.ML.OnnxRuntime",
92+
"Microsoft.ML.TimeSeries"
93+
]
94+
}
95+
},
96+
"usingStatements": {
97+
"type": "array",
98+
"description": "using statements",
99+
"minItems": 0,
100+
"items": {
101+
"type": "string",
102+
"enum": [
103+
"Microsoft.ML",
104+
"Microsoft.ML.Data",
105+
"Microsoft.ML.Trainers",
106+
"Microsoft.ML.Vision",
107+
"Microsoft.ML.Transforms.Image",
108+
"Microsoft.ML.Trainers.FastTree",
109+
"Microsoft.ML.Trainers.LightGbm"
110+
]
111+
}
112+
},
113+
"arguments": {
114+
"type": "array",
115+
"description": "arguments list",
116+
"minItems": 0,
117+
"items": {
118+
"type": "object",
119+
"properties": {
120+
"argumentName": {
121+
"type": "string",
122+
"enum": [
123+
"labelColumnName",
124+
"featureColumnName",
125+
"exampleWeightColumnName",
126+
"rowGroupColumnName",
127+
"numberOfLeaves",
128+
"minimumExampleCountPerLeaf",
129+
"learningRate",
130+
"numberOfIterations",
131+
"numberOfTrees",
132+
"lossFunction",
133+
"decreaseLearningRate",
134+
"l1Regularization",
135+
"l2Regularization",
136+
"optimizationTolerance",
137+
"historySize",
138+
"enforceNonNegativity",
139+
"maximumNumberOfIterations",
140+
"matrixColumnIndexColumnName",
141+
"matrixRowIndexColumnName",
142+
"approximationRank",
143+
"scoreColumnName",
144+
"predictedLabelColumnName",
145+
"validationSet",
146+
"inputColumnNames",
147+
"outputColumnName",
148+
"inputColumnName",
149+
"outputKind",
150+
"maximumNumberOfKeys",
151+
"keyOrdinality",
152+
"keyData",
153+
"imageFolder",
154+
"maximumNumberOfInverts",
155+
"numberOfBits",
156+
"maximumExampleCount",
157+
"fixZero",
158+
"modelFile",
159+
"gpuDeviceId",
160+
"fallbackToCpu",
161+
"imageWidth",
162+
"colorsToExtract",
163+
"orderOfExtraction",
164+
"imageHeight",
165+
"cropAnchor",
166+
"resizing",
167+
"featureFraction",
168+
"maximumBinCountPerFeature",
169+
"subsampleFraction",
170+
"windowSize",
171+
"seriesLength",
172+
"trainSize",
173+
"horizon",
174+
"isAdaptive",
175+
"discountFactor",
176+
"rank",
177+
"maxRank",
178+
"shouldStabilize",
179+
"shouldMaintainInfo",
180+
"confidenceLowerBoundColumn",
181+
"confidenceUpperBoundColumn",
182+
"confidenceLevel",
183+
"variableHorizon"
184+
]
185+
},
186+
"argumentType": {
187+
"type": "string",
188+
"enum": [
189+
"integer",
190+
"float",
191+
"double",
192+
"string",
193+
"boolean",
194+
"resizingKind",
195+
"colorBits",
196+
"colorsOrder",
197+
"anchor"
198+
]
199+
}
200+
}
201+
}
202+
},
203+
"searchOption": {
204+
"$ref": "search-space-schema.json#/definitions/search_space_name"
205+
}
206+
},
207+
"required": [ "estimatorTypes", "functionName" ]
208+
}
209+
}
210+
}
211+
}

0 commit comments

Comments
 (0)