Skip to content

Commit b15963e

Browse files
committed
API documentation
1 parent 3cb7446 commit b15963e

22 files changed

+778
-199
lines changed

autoPyTorch/components/ensembles/abstract_ensemble.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33

44
class AbstractEnsemble(object):
5+
"""Ensemble interface extracted from auto-sklearn"""
6+
57
__metaclass__ = ABCMeta
68

79
@abstractmethod

autoPyTorch/components/ensembles/ensemble_selection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88

99
class EnsembleSelection(AbstractEnsemble):
10+
"""Ensemble Selection algorithm extracted from auto-sklearn"""
11+
1012
def __init__(self, ensemble_size, metric,
1113
sorted_initialization_n_best=0, only_consider_n_best=0,
1214
bagging=False, mode='fast'):

autoPyTorch/components/lr_scheduler/lr_schedulers.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,18 @@
1717
__license__ = "BSD"
1818

1919
class AutoNetLearningRateSchedulerBase(object):
20-
def __new__(cls, params, config):
21-
scheduler = cls._get_scheduler(cls, params, config)
20+
def __new__(cls, optimizer, config):
21+
"""Get a new instance of the scheduler
22+
23+
Arguments:
24+
cls {class} -- Type of scheduler
25+
optimizer {Optmizer} -- A PyTorch Optimizer
26+
config {dict} -- Sampled lr_scheduler config
27+
28+
Returns:
29+
AutoNetLearningRateSchedulerBase -- The learning rate scheduler object
30+
"""
31+
scheduler = cls._get_scheduler(cls, optimizer, config)
2232
if not hasattr(scheduler, "allows_early_stopping"):
2333
scheduler.allows_early_stopping = True
2434
if not hasattr(scheduler, "snapshot_before_restart"):

autoPyTorch/components/metrics/additional_logs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
class test_result():
3+
"""Log the performance on the test set"""
34
def __init__(self, autonet, X_test, Y_test):
45
self.autonet = autonet
56
self.X_test = X_test

autoPyTorch/components/metrics/balanced_accuracy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
def balanced_accuracy(solution, prediction):
8+
"""balanced accuracy implementation of auto-sklearn"""
89

910
y_type, solution, prediction = _check_targets(solution, prediction)
1011

autoPyTorch/core/autonet_classes/autonet_feature_classification.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
class AutoNetClassification(AutoNetFeatureData):
44
preset_folder_name = "feature_classification"
55

6+
# OVERRIDE
67
@staticmethod
78
def _apply_default_pipeline_settings(pipeline):
89
from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector

autoPyTorch/core/autonet_classes/autonet_feature_data.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ class AutoNetFeatureData(AutoNet):
99

1010
@classmethod
1111
def get_default_ensemble_pipeline(cls):
12+
"""Construct a default pipeline, include nodes for Ensemble.
13+
14+
Returns:
15+
Pipeline -- The constructed default pipeline
16+
"""
1217
from autoPyTorch.pipeline.base.pipeline import Pipeline
1318
from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
1419
CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
@@ -50,6 +55,11 @@ def get_default_ensemble_pipeline(cls):
5055

5156
@classmethod
5257
def get_default_pipeline(cls):
58+
"""Construct a default pipeline, do not include nodes for Ensemble.
59+
60+
Returns:
61+
Pipeline -- The constructed default pipeline
62+
"""
5363
from autoPyTorch.pipeline.base.pipeline import Pipeline
5464
from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
5565
CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
@@ -87,6 +97,11 @@ def get_default_pipeline(cls):
8797

8898
@staticmethod
8999
def _apply_default_pipeline_settings(pipeline):
100+
"""Add the components to the pipeline
101+
102+
Arguments:
103+
pipeline {pipeline} -- The pipelines to add the components to
104+
"""
90105
from autoPyTorch.pipeline.nodes import NormalizationStrategySelector, PreprocessorSelector, EmbeddingSelector, NetworkSelector, \
91106
OptimizerSelector, LearningrateSchedulerSelector, TrainNode, CrossValidation, InitializationSelector
92107

@@ -150,4 +165,4 @@ def _apply_default_pipeline_settings(pipeline):
150165
train_node.add_batch_loss_computation_technique("mixup", Mixup)
151166

152167
cv = pipeline[CrossValidation.get_name()]
153-
cv.add_cross_validator("k_fold", KFold)
168+
cv.add_cross_validator("k_fold", KFold)

autoPyTorch/core/autonet_classes/autonet_feature_multilabel.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
class AutoNetMultilabel(AutoNetFeatureData):
44
preset_folder_name = "feature_multilabel"
55

6+
# OVERRIDE
67
@staticmethod
78
def _apply_default_pipeline_settings(pipeline):
89
from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector

autoPyTorch/core/autonet_classes/autonet_feature_regression.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
class AutoNetRegression(AutoNetFeatureData):
99
preset_folder_name = "feature_regression"
1010

11+
# OVERRIDE
1112
@staticmethod
1213
def _apply_default_pipeline_settings(pipeline):
1314
from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector

autoPyTorch/data_management/data_manager.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,14 @@ class ProblemType(Enum):
2121
FeatureMultilabel = 4
2222

2323
class DataManager(object):
24+
""" Load data from multiple sources and formants"""
25+
2426
def __init__(self, verbose=0):
27+
"""Construct the DataManager
28+
29+
Keyword Arguments:
30+
verbose {bool} -- Whether to print stuff. (default: {0})
31+
"""
2532
self.verbose = verbose
2633
self.X_train, self.Y_train = None, None
2734
self.X_test, self.Y_test = None, None
@@ -33,6 +40,16 @@ def __init__(self, verbose=0):
3340
self.categorical_features = None
3441

3542
def read_data(self, file_name, test_split=0.0, is_classification=None, random_seed=0, **kwargs):
43+
"""Read the data.
44+
45+
Arguments:
46+
file_name {str} -- The name of the file to load. Different Readers are associated with different filenames.
47+
48+
Keyword Arguments:
49+
test_split {float} -- Amount of data to use as test split (default: {0.0})
50+
is_classification {bool} -- Whether the data is a classification task (default: {None})
51+
random_seed {int} -- a random seed (default: {0})
52+
"""
3653
print("Read:" + file_name)
3754
reader = self._get_reader(file_name, is_classification)
3855
reader.read()
@@ -53,6 +70,18 @@ def read_data(self, file_name, test_split=0.0, is_classification=None, random_se
5370
self._split_data(test_split, random_seed)
5471

5572
def _get_reader(self, file_name, is_classification):
73+
"""Get the reader associated with the filename.
74+
75+
Arguments:
76+
file_name {str} -- The file to load
77+
is_classification {bool} -- Whether the data is a classification task or not
78+
79+
Raises:
80+
ValueError: The given file type is not supported
81+
82+
Returns:
83+
DataReader -- A reader that is able to read the data type
84+
"""
5685
if file_name.endswith(".csv"):
5786
reader = CSVReader(file_name, is_classification=is_classification)
5887
elif file_name.startswith("openml:"):
@@ -65,6 +94,17 @@ def _get_reader(self, file_name, is_classification):
6594
return reader
6695

6796
def generate_classification(self, num_classes, num_features, num_samples, test_split=0.1, seed=0):
97+
"""Generate a classification task
98+
99+
Arguments:
100+
num_classes {int} -- Number of classes
101+
num_features {int} -- Number of features
102+
num_samples {int} -- Number of samples
103+
104+
Keyword Arguments:
105+
test_split {float} -- Size of test split (default: {0.1})
106+
seed {int} -- A random seed (default: {0})
107+
"""
68108
#X, Y = make_classification(n_samples=800, n_features=num_feats, n_classes=num_classes, n_informative=4)
69109
X, y = make_multilabel_classification(
70110
n_samples=num_samples, n_features=num_features, n_classes=num_classes, n_labels=0.01,
@@ -78,13 +118,29 @@ def generate_classification(self, num_classes, num_features, num_samples, test_s
78118
self._split_data(test_split, seed)
79119

80120
def generate_regression(self, num_features, num_samples, test_split=0.1, seed=0):
121+
"""Generate a regression task
122+
123+
Arguments:
124+
num_features {int} -- Number of features
125+
num_samples {int} -- Number of samples
126+
127+
Keyword Arguments:
128+
test_split {float} -- Size of test split (default: {0.1})
129+
seed {int} -- a random seed (default: {0})
130+
"""
81131
X, Y = make_regression(n_samples=num_samples, n_features=num_features, random_state=seed)
82132
self.categorical_features = [False] * num_features
83133
self.problem_type = ProblemType.FeatureRegression
84134
self.X, self.Y = X, Y
85135
self._split_data(test_split, seed)
86136

87137
def _split_data(self, test_split, seed):
138+
"""Split the data in test (, valid) and training set.
139+
140+
Arguments:
141+
test_split {[type]} -- [description]
142+
seed {[type]} -- [description]
143+
"""
88144
valid_specified = self.X_valid is not None and self.Y_valid is not None
89145
test_specified = self.X_test is not None and self.Y_test is not None
90146

@@ -101,6 +157,17 @@ def _split_data(self, test_split, seed):
101157
self.Y_train = self.Y
102158

103159
def deterministic_shuffle_and_split(X, Y, split, seed):
160+
"""Split the data deterministically given the seed
161+
162+
Arguments:
163+
X {array} -- The feature data
164+
Y {array} -- The targets
165+
split {float} -- The size of the split
166+
seed {int} -- A random seed
167+
168+
Returns:
169+
tuple -- Tuple of full data and the two splits
170+
"""
104171
rng = np.random.RandomState(seed)
105172
p = rng.permutation(X.shape[0])
106173

@@ -110,4 +177,4 @@ def deterministic_shuffle_and_split(X, Y, split, seed):
110177
split = int(split * X.shape[0])
111178
return X, Y, X[0:-split], Y[0:-split], X[-split:], Y[-split:]
112179
else:
113-
return X, Y, X, Y, None, None
180+
return X, Y, X, Y, None, None

0 commit comments

Comments
 (0)