Skip to content

Commit 1fc5296

Browse files
[release/4.0] Fix up docs for MLContext (#7363)
* fix up docs for MLContext * some more fixes * text class and sentence similarity trainers --------- Co-authored-by: Genevieve Warren <[email protected]>
1 parent b489340 commit 1fc5296

File tree

5 files changed

+70
-61
lines changed

5 files changed

+70
-61
lines changed

src/Microsoft.ML.AutoML/API/ColumnInference.cs

+20-20
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace Microsoft.ML.AutoML
1515
public sealed class ColumnInferenceResults
1616
{
1717
/// <summary>
18-
/// Inferred <see cref="TextLoader.Options" /> for the dataset.
18+
/// Gets the inferred <see cref="TextLoader.Options" /> for the dataset.
1919
/// </summary>
2020
/// <remarks>
2121
/// Can be used to instantiate a new <see cref="TextLoader" /> to load
@@ -25,69 +25,69 @@ public sealed class ColumnInferenceResults
2525
public TextLoader.Options TextLoaderOptions { get; internal set; }
2626

2727
/// <summary>
28-
/// Information about the inferred columns in the dataset.
28+
/// Gets information about the inferred columns in the dataset.
2929
/// </summary>
3030
/// <remarks>
3131
/// <para>Contains the inferred purposes of each column. See <see cref="AutoML.ColumnInformation"/> for more details.</para>
32-
/// <para>This can be fed to the AutoML API when running an experiment.
33-
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
34-
/// for example.</para>
32+
/// <para>This value can be fed to the AutoML API when running an experiment.
33+
/// See <see cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />, for example.</para>
3534
/// </remarks>
3635
[JsonProperty(DefaultValueHandling = DefaultValueHandling.Include)]
3736
public ColumnInformation ColumnInformation { get; internal set; }
3837
}
3938

4039
/// <summary>
41-
/// Information about the columns in a dataset.
40+
/// Provides information about the columns in a dataset.
4241
/// </summary>
4342
/// <remarks>
4443
/// <para>Contains information about the purpose of each column in the dataset. For instance,
4544
/// it enumerates the dataset columns that AutoML should treat as categorical,
4645
/// the columns AutoML should ignore, which column is the label, etc.</para>
4746
/// <para><see cref="ColumnInformation"/> can be fed to the AutoML API when running an experiment.
48-
/// See <typeref cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />
49-
/// for example.</para>
47+
/// See <see cref="ExperimentBase{TMetrics, TExperimentSettings}.Execute(IDataView, ColumnInformation, IEstimator{ITransformer}, System.IProgress{RunDetail{TMetrics}})" />, for example.</para>
5048
/// </remarks>
5149
public sealed class ColumnInformation
5250
{
5351
/// <summary>
54-
/// The dataset column to use as the label.
52+
/// Gets or sets the dataset column to use as the label.
5553
/// </summary>
5654
/// <value>The default value is "Label".</value>
5755
public string LabelColumnName { get; set; }
5856

5957
/// <summary>
60-
/// The dataset column to use as a user ID for computation.
58+
/// Gets or sets the dataset column to use as a user ID for computation.
6159
/// </summary>
6260
public string UserIdColumnName { get; set; }
6361

6462
/// <summary>
65-
/// The dataset column to use as a group ID for computation in a Ranking Task.
63+
/// Gets or sets the dataset column to use as a group ID for computation in a Ranking Task.
6664
/// If a SamplingKeyColumnName is provided, then it should be the same as this column.
6765
/// </summary>
6866
public string GroupIdColumnName { get; set; }
6967

7068
/// <summary>
71-
/// The dataset column to use as a item ID for computation.
69+
/// Gets or sets the dataset column to use as a item ID for computation.
7270
/// </summary>
7371
public string ItemIdColumnName { get; set; }
7472

7573
/// <summary>
76-
/// The dataset column to use for example weight.
74+
/// Gets or sets the dataset column to use for example weight.
7775
/// </summary>
7876
public string ExampleWeightColumnName { get; set; }
7977

8078
/// <summary>
81-
/// The dataset column to use for grouping rows.
79+
/// Gets or sets the dataset column to use for grouping rows.
80+
/// </summary>
81+
/// <remarks>
8282
/// If two examples share the same sampling key column name,
8383
/// they are guaranteed to appear in the same subset (train or test).
8484
/// This can be used to ensure no label leakage from the train to the test set.
8585
/// If <see langword="null"/>, no row grouping will be performed.
86-
/// </summary>
86+
/// </remarks>
8787
public string SamplingKeyColumnName { get; set; }
8888

8989
/// <summary>
90-
/// The dataset columns that are categorical.
90+
/// Gets or sets the dataset columns that are categorical.
9191
/// </summary>
9292
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
9393
/// <remarks>
@@ -97,28 +97,28 @@ public sealed class ColumnInformation
9797
public ICollection<string> CategoricalColumnNames { get; private set; }
9898

9999
/// <summary>
100-
/// The dataset columns that are numeric.
100+
/// Gets the dataset columns that are numeric.
101101
/// </summary>
102102
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
103103
[JsonProperty]
104104
public ICollection<string> NumericColumnNames { get; private set; }
105105

106106
/// <summary>
107-
/// The dataset columns that are text.
107+
/// Gets the dataset columns that are text.
108108
/// </summary>
109109
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
110110
[JsonProperty]
111111
public ICollection<string> TextColumnNames { get; private set; }
112112

113113
/// <summary>
114-
/// The dataset columns that AutoML should ignore.
114+
/// Gets the dataset columns that AutoML should ignore.
115115
/// </summary>
116116
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
117117
[JsonProperty]
118118
public ICollection<string> IgnoredColumnNames { get; private set; }
119119

120120
/// <summary>
121-
/// The dataset columns that are image paths.
121+
/// Gets the dataset columns that are image paths.
122122
/// </summary>
123123
/// <value>The default value is a new, empty <see cref="Collection{String}"/>.</value>
124124
[JsonProperty]

src/Microsoft.ML.AutoML/ColumnInference/ColumnInformationUtil.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ public static ColumnInformation BuildColumnInfo(IEnumerable<DatasetColumnInfo> c
122122
}
123123

124124
/// <summary>
125-
/// Get all column names that are in <paramref name="columnInformation"/>.
125+
/// Gets all column names that are in <paramref name="columnInformation"/>.
126126
/// </summary>
127127
/// <param name="columnInformation">Column information.</param>
128128
public static IEnumerable<string> GetColumnNames(ColumnInformation columnInformation)

src/Microsoft.ML.Data/MLContext.cs

+27-20
Original file line numberDiff line numberDiff line change
@@ -11,72 +11,78 @@
1111
namespace Microsoft.ML
1212
{
1313
/// <summary>
14-
/// The common context for all ML.NET operations. Once instantiated by the user, it provides a way to
14+
/// Represents the common context for all ML.NET operations.
15+
/// </summary>
16+
/// <remarks>
17+
/// Once instantiated by the user, this class provides a way to
1518
/// create components for data preparation, feature engineering, training, prediction, and model evaluation.
1619
/// It also allows logging, execution control, and the ability to set repeatable random numbers.
17-
/// </summary>
20+
/// </remarks>
1821
public sealed class MLContext : IHostEnvironmentInternal
1922
{
2023
// REVIEW: consider making LocalEnvironment and MLContext the same class instead of encapsulation.
2124
private readonly LocalEnvironment _env;
2225

2326
/// <summary>
24-
/// Trainers and tasks specific to binary classification problems.
27+
/// Gets the trainers and tasks specific to binary classification problems.
2528
/// </summary>
2629
public BinaryClassificationCatalog BinaryClassification { get; }
30+
2731
/// <summary>
28-
/// Trainers and tasks specific to multiclass classification problems.
32+
/// Gets the trainers and tasks specific to multiclass classification problems.
2933
/// </summary>
3034
public MulticlassClassificationCatalog MulticlassClassification { get; }
35+
3136
/// <summary>
32-
/// Trainers and tasks specific to regression problems.
37+
/// Gets the trainers and tasks specific to regression problems.
3338
/// </summary>
3439
public RegressionCatalog Regression { get; }
40+
3541
/// <summary>
36-
/// Trainers and tasks specific to clustering problems.
42+
/// Gets the trainers and tasks specific to clustering problems.
3743
/// </summary>
3844
public ClusteringCatalog Clustering { get; }
3945

4046
/// <summary>
41-
/// Trainers and tasks specific to ranking problems.
47+
/// Gets the trainers and tasks specific to ranking problems.
4248
/// </summary>
4349
public RankingCatalog Ranking { get; }
4450

4551
/// <summary>
46-
/// Trainers and tasks specific to anomaly detection problems.
52+
/// Gets the trainers and tasks specific to anomaly detection problems.
4753
/// </summary>
4854
public AnomalyDetectionCatalog AnomalyDetection { get; }
4955

5056
/// <summary>
51-
/// Trainers and tasks specific to forecasting problems.
57+
/// Gets the trainers and tasks specific to forecasting problems.
5258
/// </summary>
5359
public ForecastingCatalog Forecasting { get; }
5460

5561
/// <summary>
56-
/// Data processing operations.
62+
/// Gets the data processing operations.
5763
/// </summary>
5864
public TransformsCatalog Transforms { get; }
5965

6066
/// <summary>
61-
/// Operations with trained models.
67+
/// Gets the operations with trained models.
6268
/// </summary>
6369
public ModelOperationsCatalog Model { get; }
6470

6571
/// <summary>
66-
/// Data loading and saving.
72+
/// Gets the data loading and saving operations.
6773
/// </summary>
6874
public DataOperationsCatalog Data { get; }
6975

7076
// REVIEW: I think it's valuable to have the simplest possible interface for logging interception here,
7177
// and expand if and when necessary. Exposing classes like ChannelMessage, MessageSensitivity and so on
7278
// looks premature at this point.
7379
/// <summary>
74-
/// The handler for the log messages.
80+
/// Represents the callback method that will handle the log messages.
7581
/// </summary>
7682
public event EventHandler<LoggingEventArgs> Log;
7783

7884
/// <summary>
79-
/// This is a catalog of components that will be used for model loading.
85+
/// Gets the catalog of components that will be used for model loading.
8086
/// </summary>
8187
public ComponentCatalog ComponentCatalog => _env.ComponentCatalog;
8288

@@ -90,7 +96,8 @@ public string TempFilePath
9096
}
9197

9298
/// <summary>
93-
/// Allow falling back to run on CPU if couldn't run on GPU.
99+
/// Gets or sets a value that indicates whether the CPU will
100+
/// be used if the task couldn't run on GPU.
94101
/// </summary>
95102
public bool FallbackToCpu
96103
{
@@ -99,7 +106,7 @@ public bool FallbackToCpu
99106
}
100107

101108
/// <summary>
102-
/// GPU device ID to run execution on, <see langword="null" /> to run on CPU.
109+
/// Gets or sets the GPU device ID to run execution on, <see langword="null" /> to run on CPU.
103110
/// </summary>
104111
public int? GpuDeviceId
105112
{
@@ -120,17 +127,17 @@ public int? GpuDeviceId
120127
///
121128
/// If a fixed seed is provided by <paramref name="seed"/>, MLContext environment becomes
122129
/// deterministic, meaning that the results are repeatable and will remain the same across multiple runs.
123-
/// For instance in many of ML.NET's API reference example code snippets, a seed is provided.
130+
/// For instance, in many of ML.NET's API reference example code snippets, a seed is provided.
124131
/// That's because we want the users to get the same output as what's included in example comments,
125132
/// when they run the example on their own machine.
126133
///
127134
/// Generally though, repeatability is not a requirement and that's the default behavior.
128-
/// If a seed is not provided by <paramref name="seed"/>, i.e. it's set to <see langword="null"/>,
135+
/// If a seed is not provided by <paramref name="seed"/>, that is, it's set to <see langword="null"/>,
129136
/// MLContext environment becomes non-deterministic and outputs change across multiple runs.
130137
///
131138
/// There are many operations in ML.NET that don't use any randomness, such as
132-
/// min-max normalization, concatenating columns, missing value indication, etc.
133-
/// The behavior of those operations are deterministic regardless of the seed value.
139+
/// min-max normalization, concatenating columns, and missing value indication.
140+
/// The behavior of those operations is deterministic regardless of the seed value.
134141
///
135142
/// Also ML.NET trainers don't use randomness *after* the training is finished.
136143
/// So, the predictions from a loaded model don't depend on the seed value.

src/Microsoft.ML.TorchSharp/NasBert/SentenceSimilarityTrainer.cs

+11-10
Original file line numberDiff line numberDiff line change
@@ -27,31 +27,32 @@
2727
namespace Microsoft.ML.TorchSharp.NasBert
2828
{
2929
/// <summary>
30-
/// The <see cref="IEstimator{TTransformer}"/> for training a Deep Neural Network(DNN) to classify text.
30+
/// Represents the <see cref="IEstimator{TTransformer}"/> for training a Deep Neural Network (DNN) to determine sentence similarity.
3131
/// </summary>
3232
/// <remarks>
3333
/// <format type="text/markdown"><![CDATA[
3434
/// To create this trainer, use [TextClassification](xref:Microsoft.ML.TorchSharpCatalog.TextClassification(Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers,Int32,System.String,System.String,System.String,System.String,Int32,Int32,Int32,Microsoft.ML.TorchSharp.NasBert.BertArchitecture,Microsoft.ML.IDataView)).
3535
///
36-
/// ### Input and Output Columns
37-
/// The input label column data must be type<xref:System.Single> type and the sentence columns must be of type<xref:Microsoft.ML.Data.TextDataViewType>.
36+
/// ### Input and output columns
37+
/// The input label column data must be type <xref:System.Single> and the sentence columns must be of type <xref:Microsoft.ML.Data.TextDataViewType>.
3838
///
3939
/// This trainer outputs the following columns:
4040
///
41-
/// | Output Column Name | Column Type | Description|
41+
/// | Output column name | Column type | Description|
4242
/// | -- | -- | -- |
43-
/// | `Score` | <xref:System.Single> | The degree of similarity between the 2 sentences. |
44-
/// ### Trainer Characteristics
45-
/// | | |
43+
/// | `Score` | <xref:System.Single> | The degree of similarity between the two sentences. |
44+
///
45+
/// ### Trainer characteristics
46+
/// | Characteristic | Value |
4647
/// | -- | -- |
47-
/// | Machine learning task | Rregression |
48+
/// | Machine learning task | Regression |
4849
/// | Is normalization required? | No |
4950
/// | Is caching required? | No |
5051
/// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TorchSharp and libtorch-cpu or libtorch-cuda-11.3 or any of the OS specific variants. |
5152
/// | Exportable to ONNX | No |
5253
///
53-
/// ### Training Algorithm Details
54-
/// Trains a Deep Neural Network(DNN) by leveraging an existing pre-trained NAS-BERT roBERTa model for the purpose of determining sentence similarity.
54+
/// ### Training algorithm details
55+
/// Trains a Deep Neural Network (DNN) by leveraging an existing, pretrained NAS-BERT roBERTa model for the purpose of determining sentence similarity.
5556
/// ]]>
5657
/// </format>
5758
/// </remarks>

src/Microsoft.ML.TorchSharp/NasBert/TextClassificationTrainer.cs

+11-10
Original file line numberDiff line numberDiff line change
@@ -28,32 +28,33 @@
2828
namespace Microsoft.ML.TorchSharp.NasBert
2929
{
3030
/// <summary>
31-
/// The <see cref="IEstimator{TTransformer}"/> for training a Deep Neural Network(DNN) to classify text.
31+
/// The <see cref="IEstimator{TTransformer}"/> for training a Deep Neural Network (DNN) to classify text.
3232
/// </summary>
3333
/// <remarks>
3434
/// <format type="text/markdown"><![CDATA[
3535
/// To create this trainer, use [TextClassification](xref:Microsoft.ML.TorchSharpCatalog.TextClassification(Microsoft.ML.MulticlassClassificationCatalog.MulticlassClassificationTrainers,Int32,System.String,System.String,System.String,System.String,Int32,Int32,Int32,Microsoft.ML.TorchSharp.NasBert.BertArchitecture,Microsoft.ML.IDataView)).
3636
///
37-
/// ### Input and Output Columns
38-
/// The input label column data must be [key](xref:Microsoft.ML.Data.KeyDataViewType) type and the sentence columns must be of type<xref:Microsoft.ML.Data.TextDataViewType>.
37+
/// ### Input and output columns
38+
/// The input label column data must be [key](xref:Microsoft.ML.Data.KeyDataViewType) type and the sentence columns must be of type <xref:Microsoft.ML.Data.TextDataViewType>.
3939
///
4040
/// This trainer outputs the following columns:
4141
///
42-
/// | Output Column Name | Column Type | Description|
42+
/// | Output column name | Column type | Description|
4343
/// | -- | -- | -- |
44-
/// | `PredictedLabel` | [key](xref:Microsoft.ML.Data.KeyDataViewType) type | The predicted label's index. If its value is i, the actual label would be the i-th category in the key-valued input label type. |
45-
/// | `Score` | Vector of<xref:System.Single> | The scores of all classes.Higher value means higher probability to fall into the associated class. If the i-th element has the largest value, the predicted label index would be i.Note that i is zero-based index. |
46-
/// ### Trainer Characteristics
47-
/// | | |
44+
/// | `PredictedLabel` | [key](xref:Microsoft.ML.Data.KeyDataViewType) type | The predicted label's index. If its value is `i`, the actual label would be the `i`-th category in the key-valued input label type. |
45+
/// | `Score` | Vector of<xref:System.Single> | The scores of all classes. Higher value means higher probability to fall into the associated class. If the `i`-th element has the largest value, the predicted label index would be `i`. Note that `i` is a zero-based index. |
46+
///
47+
/// ### Trainer characteristics
48+
/// | Characteristic | Value |
4849
/// | -- | -- |
4950
/// | Machine learning task | Multiclass classification |
5051
/// | Is normalization required? | No |
5152
/// | Is caching required? | No |
5253
/// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TorchSharp and libtorch-cpu or libtorch-cuda-11.3 or any of the OS specific variants. |
5354
/// | Exportable to ONNX | No |
5455
///
55-
/// ### Training Algorithm Details
56-
/// Trains a Deep Neural Network(DNN) by leveraging an existing pre-trained NAS-BERT roBERTa model for the purpose of classifying text.
56+
/// ### Training algorithm details
57+
/// Trains a Deep Neural Network (DNN) by leveraging an existing, pretrained NAS-BERT roBERTa model for the purpose of classifying text.
5758
/// ]]>
5859
/// </format>
5960
/// </remarks>

0 commit comments

Comments
 (0)