-
Notifications
You must be signed in to change notification settings - Fork 1.9k
/
Copy pathColumnInference.cs
137 lines (122 loc) · 5.97 KB
/
ColumnInference.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
using System.Collections.Generic;
using System.Collections.ObjectModel;
using Microsoft.ML.Data;
using Newtonsoft.Json;
namespace Microsoft.ML.AutoML
{
/// <summary>
/// Contains information AutoML inferred about columns in a dataset.
/// </summary>
public sealed class ColumnInferenceResults
{
/// <summary>
/// Gets the inferred <see cref="TextLoader.Options" /> for the dataset.
/// </summary>
/// <remarks>
/// Can be used to instantiate a new <see cref="TextLoader" /> to load
/// data into an <see cref="IDataView" />.
/// </remarks>
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
public TextLoader.Options TextLoaderOptions { get; internal set; }
/// <summary>
/// Gets information about the inferred columns in the dataset.
/// </summary>
/// <remarks>
/// <para>Contains the inferred purposes of each column. See <see cref="AutoML.ColumnInformation"/> for more details.</para>
/// <para>This value can be fed to the AutoML API when running an experiment.
/// See <see cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />, for example.</para>
/// </remarks>
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
public ColumnInformation ColumnInformation { get; internal set; }
}
/// <summary>
/// Provides information about the columns in a dataset.
/// </summary>
/// <remarks>
/// <para>Contains information about the purpose of each column in the dataset. For instance,
/// it enumerates the dataset columns that AutoML should treat as categorical,
/// the columns AutoML should ignore, which column is the label, etc.</para>
/// <para><see cref="ColumnInformation"/> can be fed to the AutoML API when running an experiment.
/// See <see cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />, for example.</para>
/// </remarks>
public sealed class ColumnInformation
{
/// <summary>
/// Gets or sets the dataset column to use as the label.
/// </summary>
/// <value>The default value is "Label".</value>
public string LabelColumnName { get; set; }
/// <summary>
/// Gets or sets the dataset column to use as a user ID for computation.
/// </summary>
public string UserIdColumnName { get; set; }
/// <summary>
/// Gets or sets the dataset column to use as a group ID for computation in a Ranking Task.
/// If a SamplingKeyColumnName is provided, then it should be the same as this column.
/// </summary>
public string GroupIdColumnName { get; set; }
/// <summary>
/// Gets or sets the dataset column to use as a item ID for computation.
/// </summary>
public string ItemIdColumnName { get; set; }
/// <summary>
/// Gets or sets the dataset column to use for example weight.
/// </summary>
public string ExampleWeightColumnName { get; set; }
/// <summary>
/// Gets or sets the dataset column to use for grouping rows.
/// </summary>
/// <remarks>
/// If two examples share the same sampling key column name,
/// they are guaranteed to appear in the same subset (train or test).
/// This can be used to ensure no label leakage from the train to the test set.
/// If <see langword="null"/>, no row grouping will be performed.
/// </remarks>
public string SamplingKeyColumnName { get; set; }
/// <summary>
/// Gets or sets the dataset columns that are categorical.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
/// <remarks>
/// Categorical data columns should generally be columns that contain a small number of unique values.
/// </remarks>
[JsonProperty]
public ICollection<string> CategoricalColumnNames { get; private set; }
/// <summary>
/// Gets the dataset columns that are numeric.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
[JsonProperty]
public ICollection<string> NumericColumnNames { get; private set; }
/// <summary>
/// Gets the dataset columns that are text.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
[JsonProperty]
public ICollection<string> TextColumnNames { get; private set; }
/// <summary>
/// Gets the dataset columns that AutoML should ignore.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
[JsonProperty]
public ICollection<string> IgnoredColumnNames { get; private set; }
/// <summary>
/// Gets the dataset columns that are image paths.
/// </summary>
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
[JsonProperty]
public ICollection<string> ImagePathColumnNames { get; private set; }
public ColumnInformation()
{
LabelColumnName = DefaultColumnNames.Label;
CategoricalColumnNames = new Collection<string>();
NumericColumnNames = new Collection<string>();
TextColumnNames = new Collection<string>();
IgnoredColumnNames = new Collection<string>();
ImagePathColumnNames = new Collection<string>();
}
}
}