9
9
import pandas as pd
10
10
import yaml
11
11
12
- from . import api , exceptions , utils , validators
12
+ from . import api , exceptions , utils
13
13
from .projects import Project
14
14
from .schemas import BaselineModelSchema , DatasetSchema , ModelSchema
15
15
from .tasks import TaskType
16
+
17
+ # from validators import models as model_validators
18
+ from .validators import (
19
+ commit_validators ,
20
+ dataset_validators ,
21
+ model_validators ,
22
+ project_validators ,
23
+ )
16
24
from .version import __version__ # noqa: F401
17
25
18
26
OPENLAYER_DIR = os .path .join (os .path .expanduser ("~" ), ".openlayer" )
@@ -91,7 +99,9 @@ def create_project(
91
99
"description" : description ,
92
100
"task_type" : task_type ,
93
101
}
94
- project_validator = validators .ProjectValidator (project_config = project_config )
102
+ project_validator = project_validators .ProjectValidator (
103
+ project_config = project_config
104
+ )
95
105
failed_validations = project_validator .validate ()
96
106
97
107
if failed_validations :
@@ -101,7 +111,11 @@ def create_project(
101
111
) from None
102
112
103
113
endpoint = "projects"
104
- payload = dict (name = name , description = description , taskType = task_type .value )
114
+ payload = {
115
+ "name" : name ,
116
+ "description" : description ,
117
+ "taskType" : task_type .value ,
118
+ }
105
119
project_data = self .api .post_request (endpoint , body = payload )
106
120
107
121
project = Project (project_data , self .api .upload , self )
@@ -232,22 +246,29 @@ def add_model(
232
246
233
247
The model configuration YAML file must contain the following fields:
234
248
235
- - `` name`` : str
249
+ name : str
236
250
Name of the model.
237
- - `` architectureType`` : str
251
+ architectureType : str
238
252
The model's framework. Must be one of the supported frameworks
239
253
on :obj:`ModelType`.
240
- - `` classNames`` : List[str]
254
+ classNames : List[str]
241
255
List of class names corresponding to the outputs of your predict function.
242
256
E.g. ``['positive', 'negative']``.
243
- - `` featureNames`` : List[str], default []
257
+ featureNames : List[str], default []
244
258
List of input feature names. Only applicable if your ``task_type`` is
245
259
:obj:`TaskType.TabularClassification` or :obj:`TaskType.TabularRegression`.
246
- - `` categoricalFeatureNames`` : List[str], default []
260
+ categoricalFeatureNames : List[str], default []
247
261
A list containing the names of all categorical features used by the model.
248
262
E.g. ``["Gender", "Geography"]``. Only applicable if your ``task_type`` is
249
263
:obj:`TaskType.TabularClassification` or :obj:`TaskType.TabularRegression`.
250
- - ``metadata`` : Dict[str, any], default {}
264
+ predictionThreshold : float, default None
265
+ The threshold used to determine the predicted class. Only applicable if you
266
+ are using a binary classifier and you provided the ``predictionScoresColumnName``
267
+ with the lists of class probabilities in your datasets (refer to :obj:`add_dataframe`).
268
+
269
+ If you provided ``predictionScoresColumnName`` but not ``predictionThreshold``,
270
+ the predicted class is defined by the argmax of the lists in ``predictionScoresColumnName``.
271
+ metadata : Dict[str, any], default {}
251
272
Dictionary containing metadata about the model. This is the metadata that
252
273
will be displayed on the Openlayer platform.
253
274
@@ -263,8 +284,8 @@ def add_model(
263
284
- ``prediction_interface.py``
264
285
The prediction interface file.
265
286
- ``model artifacts``
266
- The model artifacts. This can be a single file or a directory containing
267
- multiple files. The model artifacts must be compatible with the
287
+ The model artifacts. This can be a single file, multiple files or a directory.
288
+ The model artifacts must be compatible with the
268
289
prediction interface file.
269
290
- ``requirements.txt``
270
291
The requirements file. This file contains the dependencies needed to run
@@ -389,7 +410,7 @@ def add_model(
389
410
)
390
411
391
412
# Validate model package
392
- model_package_validator = validators .ModelValidator (
413
+ model_package_validator = model_validators .ModelValidator (
393
414
model_package_dir = model_package_dir ,
394
415
model_config_file_path = model_config_file_path ,
395
416
sample_data = sample_data ,
@@ -467,7 +488,8 @@ def add_baseline_model(
467
488
)
468
489
469
490
# Validate the baseline model
470
- baseline_model_validator = validators .BaselineModelValidator (
491
+
492
+ baseline_model_validator = model_validators .BaselineModelValidator (
471
493
model_config_file_path = model_config_file_path ,
472
494
)
473
495
failed_validations = baseline_model_validator .validate ()
@@ -536,15 +558,27 @@ def add_dataset(
536
558
Column header in the csv containing the input text. Only applicable if
537
559
your ``task_type`` is :obj:`TaskType.TextClassification`.
538
560
predictionsColumnName : str, default None
539
- Column header in the csv containing the predictions. Only applicable if you
540
- are uploading a model as well with the :obj:`add_model` method.
561
+ Column header in the csv containing the model's predictions as **zero-indexed
562
+ integers**. Only applicable if you are uploading a model as well with the
563
+ :obj:`add_model` method.
564
+
565
+ This is optional if you provide a ``predictionScoresColumnName``.
566
+
567
+ .. important::
568
+ The values in this column must be zero-indexed integer values.
569
+ predictionScoresColumnName : str, default None
570
+ Column header in the csv containing the model's predictions as **lists of
571
+ class probabilities**. Only applicable if you are uploading a model as well with
572
+ the :obj:`add_model` method.
573
+
574
+ This is optional if you provide a ``predictionsColumnName``.
541
575
542
576
.. important::
543
577
Each cell in this column must contain a list of
544
578
class probabilities. For example, for a binary classification
545
579
task, the column with the predictions should look like this:
546
580
547
- **predictions **
581
+ **prediction_scores **
548
582
549
583
``[0.1, 0.9]``
550
584
@@ -684,7 +718,7 @@ class probabilities. For example, for a binary classification
684
718
>>> project.push()
685
719
"""
686
720
# Validate dataset
687
- dataset_validator = validators .DatasetValidator (
721
+ dataset_validator = dataset_validators .DatasetValidator (
688
722
dataset_config_file_path = dataset_config_file_path ,
689
723
dataset_file_path = file_path ,
690
724
)
@@ -752,15 +786,27 @@ def add_dataframe(
752
786
Column header in the dataframe containing the input text. Only applicable if
753
787
your ``task_type`` is :obj:`TaskType.TextClassification`.
754
788
predictionsColumnName : str, default None
755
- Column header in the dataframe containing the predictions. Only applicable if you
756
- are uploading a model as well with the :obj:`add_model` method.
789
+ Column header in the dataframe containing the model's predictions as **zero-indexed
790
+ integers**. Only applicable if you are uploading a model as well with the
791
+ :obj:`add_model` method.
792
+
793
+ This is optional if you provide a ``predictionScoresColumnName``.
794
+
795
+ .. important::
796
+ The values in this column must be zero-indexed integer values.
797
+ predictionScoresColumnName : str, default None
798
+ Column header in the dataframe containing the model's predictions as **lists of
799
+ class probabilities**. Only applicable if you are uploading a model as well with
800
+ the :obj:`add_model` method.
801
+
802
+ This is optional if you provide a ``predictionsColumnName``.
757
803
758
804
.. important::
759
805
Each cell in this column must contain a list of
760
806
class probabilities. For example, for a binary classification
761
807
task, the column with the predictions should look like this:
762
808
763
- **predictions **
809
+ **prediction_scores **
764
810
765
811
``[0.1, 0.9]``
766
812
@@ -950,7 +996,7 @@ def commit(self, message: str, project_id: int, force: bool = False):
950
996
>>> project.push()
951
997
"""
952
998
# Validate commit
953
- commit_validator = validators .CommitValidator (commit_message = message )
999
+ commit_validator = commit_validators .CommitValidator (commit_message = message )
954
1000
failed_validations = commit_validator .validate ()
955
1001
956
1002
if failed_validations :
@@ -1039,7 +1085,7 @@ def push(self, project_id: int):
1039
1085
commit = yaml .safe_load (commit_file )
1040
1086
1041
1087
# Validate bundle resources
1042
- commit_bundle_validator = validators .CommitBundleValidator (
1088
+ commit_bundle_validator = commit_validators .CommitBundleValidator (
1043
1089
bundle_path = project_dir ,
1044
1090
skip_dataset_validation = True ,
1045
1091
skip_model_validation = False , # Don't skip because the sample data is different
0 commit comments