Skip to content

Commit b15963e

Browse files
committed
API documentation
1 parent 3cb7446 commit b15963e

22 files changed

+778
-199
lines changed

autoPyTorch/components/ensembles/abstract_ensemble.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33

44
class AbstractEnsemble(object):
5+
"""Ensemble interface extracted from auto-sklearn"""
6+
57
__metaclass__ = ABCMeta
68

79
@abstractmethod

autoPyTorch/components/ensembles/ensemble_selection.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88

99
class EnsembleSelection(AbstractEnsemble):
10+
"""Ensemble Selection algorithm extracted from auto-sklearn"""
11+
1012
def __init__(self, ensemble_size, metric,
1113
sorted_initialization_n_best=0, only_consider_n_best=0,
1214
bagging=False, mode='fast'):

autoPyTorch/components/lr_scheduler/lr_schedulers.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,18 @@
1717
__license__ = "BSD"
1818

1919
class AutoNetLearningRateSchedulerBase(object):
20-
def __new__(cls, params, config):
21-
scheduler = cls._get_scheduler(cls, params, config)
20+
def __new__(cls, optimizer, config):
21+
"""Get a new instance of the scheduler
22+
23+
Arguments:
24+
cls {class} -- Type of scheduler
25+
optimizer {Optmizer} -- A PyTorch Optimizer
26+
config {dict} -- Sampled lr_scheduler config
27+
28+
Returns:
29+
AutoNetLearningRateSchedulerBase -- The learning rate scheduler object
30+
"""
31+
scheduler = cls._get_scheduler(cls, optimizer, config)
2232
if not hasattr(scheduler, "allows_early_stopping"):
2333
scheduler.allows_early_stopping = True
2434
if not hasattr(scheduler, "snapshot_before_restart"):

autoPyTorch/components/metrics/additional_logs.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11

22
class test_result():
3+
"""Log the performance on the test set"""
34
def __init__(self, autonet, X_test, Y_test):
45
self.autonet = autonet
56
self.X_test = X_test

autoPyTorch/components/metrics/balanced_accuracy.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66

77
def balanced_accuracy(solution, prediction):
8+
"""balanced accuracy implementation of auto-sklearn"""
89

910
y_type, solution, prediction = _check_targets(solution, prediction)
1011

autoPyTorch/core/autonet_classes/autonet_feature_classification.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
class AutoNetClassification(AutoNetFeatureData):
44
preset_folder_name = "feature_classification"
55

6+
# OVERRIDE
67
@staticmethod
78
def _apply_default_pipeline_settings(pipeline):
89
from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector

autoPyTorch/core/autonet_classes/autonet_feature_data.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ class AutoNetFeatureData(AutoNet):
99

1010
@classmethod
1111
def get_default_ensemble_pipeline(cls):
12+
"""Construct a default pipeline, include nodes for Ensemble.
13+
14+
Returns:
15+
Pipeline -- The constructed default pipeline
16+
"""
1217
from autoPyTorch.pipeline.base.pipeline import Pipeline
1318
from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
1419
CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
@@ -50,6 +55,11 @@ def get_default_ensemble_pipeline(cls):
5055

5156
@classmethod
5257
def get_default_pipeline(cls):
58+
"""Construct a default pipeline, do not include nodes for Ensemble.
59+
60+
Returns:
61+
Pipeline -- The constructed default pipeline
62+
"""
5363
from autoPyTorch.pipeline.base.pipeline import Pipeline
5464
from autoPyTorch.pipeline.nodes import AutoNetSettings, OptimizationAlgorithm, \
5565
CrossValidation, Imputation, NormalizationStrategySelector, OneHotEncoding, PreprocessorSelector, ResamplingStrategySelector, \
@@ -87,6 +97,11 @@ def get_default_pipeline(cls):
8797

8898
@staticmethod
8999
def _apply_default_pipeline_settings(pipeline):
100+
"""Add the components to the pipeline
101+
102+
Arguments:
103+
pipeline {pipeline} -- The pipelines to add the components to
104+
"""
90105
from autoPyTorch.pipeline.nodes import NormalizationStrategySelector, PreprocessorSelector, EmbeddingSelector, NetworkSelector, \
91106
OptimizerSelector, LearningrateSchedulerSelector, TrainNode, CrossValidation, InitializationSelector
92107

@@ -150,4 +165,4 @@ def _apply_default_pipeline_settings(pipeline):
150165
train_node.add_batch_loss_computation_technique("mixup", Mixup)
151166

152167
cv = pipeline[CrossValidation.get_name()]
153-
cv.add_cross_validator("k_fold", KFold)
168+
cv.add_cross_validator("k_fold", KFold)

autoPyTorch/core/autonet_classes/autonet_feature_multilabel.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
class AutoNetMultilabel(AutoNetFeatureData):
44
preset_folder_name = "feature_multilabel"
55

6+
# OVERRIDE
67
@staticmethod
78
def _apply_default_pipeline_settings(pipeline):
89
from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector

autoPyTorch/core/autonet_classes/autonet_feature_regression.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
class AutoNetRegression(AutoNetFeatureData):
99
preset_folder_name = "feature_regression"
1010

11+
# OVERRIDE
1112
@staticmethod
1213
def _apply_default_pipeline_settings(pipeline):
1314
from autoPyTorch.pipeline.nodes.network_selector import NetworkSelector

autoPyTorch/data_management/data_manager.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,14 @@ class ProblemType(Enum):
2121
FeatureMultilabel = 4
2222

2323
class DataManager(object):
24+
""" Load data from multiple sources and formants"""
25+
2426
def __init__(self, verbose=0):
27+
"""Construct the DataManager
28+
29+
Keyword Arguments:
30+
verbose {bool} -- Whether to print stuff. (default: {0})
31+
"""
2532
self.verbose = verbose
2633
self.X_train, self.Y_train = None, None
2734
self.X_test, self.Y_test = None, None
@@ -33,6 +40,16 @@ def __init__(self, verbose=0):
3340
self.categorical_features = None
3441

3542
def read_data(self, file_name, test_split=0.0, is_classification=None, random_seed=0, **kwargs):
43+
"""Read the data.
44+
45+
Arguments:
46+
file_name {str} -- The name of the file to load. Different Readers are associated with different filenames.
47+
48+
Keyword Arguments:
49+
test_split {float} -- Amount of data to use as test split (default: {0.0})
50+
is_classification {bool} -- Whether the data is a classification task (default: {None})
51+
random_seed {int} -- a random seed (default: {0})
52+
"""
3653
print("Read:" + file_name)
3754
reader = self._get_reader(file_name, is_classification)
3855
reader.read()
@@ -53,6 +70,18 @@ def read_data(self, file_name, test_split=0.0, is_classification=None, random_se
5370
self._split_data(test_split, random_seed)
5471

5572
def _get_reader(self, file_name, is_classification):
73+
"""Get the reader associated with the filename.
74+
75+
Arguments:
76+
file_name {str} -- The file to load
77+
is_classification {bool} -- Whether the data is a classification task or not
78+
79+
Raises:
80+
ValueError: The given file type is not supported
81+
82+
Returns:
83+
DataReader -- A reader that is able to read the data type
84+
"""
5685
if file_name.endswith(".csv"):
5786
reader = CSVReader(file_name, is_classification=is_classification)
5887
elif file_name.startswith("openml:"):
@@ -65,6 +94,17 @@ def _get_reader(self, file_name, is_classification):
6594
return reader
6695

6796
def generate_classification(self, num_classes, num_features, num_samples, test_split=0.1, seed=0):
97+
"""Generate a classification task
98+
99+
Arguments:
100+
num_classes {int} -- Number of classes
101+
num_features {int} -- Number of features
102+
num_samples {int} -- Number of samples
103+
104+
Keyword Arguments:
105+
test_split {float} -- Size of test split (default: {0.1})
106+
seed {int} -- A random seed (default: {0})
107+
"""
68108
#X, Y = make_classification(n_samples=800, n_features=num_feats, n_classes=num_classes, n_informative=4)
69109
X, y = make_multilabel_classification(
70110
n_samples=num_samples, n_features=num_features, n_classes=num_classes, n_labels=0.01,
@@ -78,13 +118,29 @@ def generate_classification(self, num_classes, num_features, num_samples, test_s
78118
self._split_data(test_split, seed)
79119

80120
def generate_regression(self, num_features, num_samples, test_split=0.1, seed=0):
121+
"""Generate a regression task
122+
123+
Arguments:
124+
num_features {int} -- Number of features
125+
num_samples {int} -- Number of samples
126+
127+
Keyword Arguments:
128+
test_split {float} -- Size of test split (default: {0.1})
129+
seed {int} -- a random seed (default: {0})
130+
"""
81131
X, Y = make_regression(n_samples=num_samples, n_features=num_features, random_state=seed)
82132
self.categorical_features = [False] * num_features
83133
self.problem_type = ProblemType.FeatureRegression
84134
self.X, self.Y = X, Y
85135
self._split_data(test_split, seed)
86136

87137
def _split_data(self, test_split, seed):
138+
"""Split the data in test (, valid) and training set.
139+
140+
Arguments:
141+
test_split {[type]} -- [description]
142+
seed {[type]} -- [description]
143+
"""
88144
valid_specified = self.X_valid is not None and self.Y_valid is not None
89145
test_specified = self.X_test is not None and self.Y_test is not None
90146

@@ -101,6 +157,17 @@ def _split_data(self, test_split, seed):
101157
self.Y_train = self.Y
102158

103159
def deterministic_shuffle_and_split(X, Y, split, seed):
160+
"""Split the data deterministically given the seed
161+
162+
Arguments:
163+
X {array} -- The feature data
164+
Y {array} -- The targets
165+
split {float} -- The size of the split
166+
seed {int} -- A random seed
167+
168+
Returns:
169+
tuple -- Tuple of full data and the two splits
170+
"""
104171
rng = np.random.RandomState(seed)
105172
p = rng.permutation(X.shape[0])
106173

@@ -110,4 +177,4 @@ def deterministic_shuffle_and_split(X, Y, split, seed):
110177
split = int(split * X.shape[0])
111178
return X, Y, X[0:-split], Y[0:-split], X[-split:], Y[-split:]
112179
else:
113-
return X, Y, X, Y, None, None
180+
return X, Y, X, Y, None, None

autoPyTorch/pipeline/base/node.py

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,41 @@ def __init__(self):
1414
self.predict_output = None
1515

1616
def fit(self, **kwargs):
17+
"""Fit pipeline node.
18+
Each node computes its fit function in linear order by fit_traverse()..
19+
All args have to be specified in a parent node fit output.
20+
21+
Returns:
22+
dict -- output values that will be passed to child nodes, if required
23+
"""
1724
return dict()
1825

1926
def predict(self, **kwargs):
27+
"""Predict pipeline node.
28+
Each node computes its predict function in linear order by predict_traverse().
29+
All args have to be specified in a parent node predict output or in the fit output of this node
30+
31+
Returns:
32+
dict -- output values that will be passed to child nodes, if required
33+
"""
2034
return dict()
2135

2236
def get_fit_argspec(self):
37+
"""Get the necessary keywords of the fit method for this node
38+
39+
Returns:
40+
tuple -- The keywords and their defaults
41+
"""
2342
possible_keywords, _, _, defaults, _, _, _ = inspect.getfullargspec(self.fit)
2443
possible_keywords = [k for k in possible_keywords if k != 'self']
2544
return possible_keywords, defaults
2645

2746
def get_predict_argspec(self):
47+
"""Get the necessary keywords of the predict method for this node
48+
49+
Returns:
50+
tuple -- The keywords and their defaults
51+
"""
2852
possible_keywords, _, _, defaults, _, _, _ = inspect.getfullargspec(self.predict)
2953
possible_keywords = [k for k in possible_keywords if k != 'self']
3054
return possible_keywords, defaults
@@ -39,10 +63,15 @@ def clean_fit_data(self):
3963
node = node.child_node
4064

4165
def fit_traverse(self, **kwargs):
42-
"""Calls fit function of child nodes.
66+
"""
67+
Calls fit function of child nodes.
4368
The fit function can have different keyword arguments.
4469
All keywords have to be either defined in kwargs or in an fit output of a parent node.
45-
70+
71+
The fit method of each node specifies a list of keyword arguments.
72+
The fit method of each node returns a dictionary of values for keywords of follwing nodes.
73+
74+
This method collects the results of each fit method call and calls the fit methods with the collected values.
4675
"""
4776

4877
self.clean_fit_data()
@@ -51,32 +80,38 @@ def fit_traverse(self, **kwargs):
5180
base = Node()
5281
base.fit_output = kwargs
5382

83+
# map all collected kwargs to node whose result the kwarg was
5484
available_kwargs = {key: base for key in kwargs.keys()}
5585

5686
node = self
5787
prev_node = base
5888

5989
while (node is not None):
6090
prev_node = node
91+
# get necessary kwargs of current node
6192
possible_keywords, defaults = node.get_fit_argspec()
6293

6394
last_required_keyword_index = len(possible_keywords) - len(defaults or [])
6495
required_kwargs = dict()
96+
97+
# get the values to the necessary keywords if available. Use default if not.
6598
for index, keyword in enumerate(possible_keywords):
6699
if (keyword in available_kwargs):
67100
required_kwargs[keyword] = available_kwargs[keyword].fit_output[keyword]
68101

69102
elif index >= last_required_keyword_index:
70103
required_kwargs[keyword] = defaults[index - last_required_keyword_index]
71104

72-
else:
105+
else: # Neither default specified nor keyword available
73106
print ("Available keywords:", sorted(available_kwargs.keys()))
74107
raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + str(keyword) + ' which is not available.')
75108

109+
# call fit method
76110
node.fit_output = node.fit(**required_kwargs)
77111
if (not isinstance(node.fit_output, dict)):
78112
raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')
79113

114+
# collect resulting keyword-value pairs
80115
for keyword in node.fit_output.keys():
81116
if keyword in available_kwargs:
82117
# delete old values
@@ -92,13 +127,20 @@ def fit_traverse(self, **kwargs):
92127
def predict_traverse(self, **kwargs):
93128
"""Calls predict function of child nodes.
94129
The predict function can have different keyword arguments.
95-
All keywords have to be either defined in kwargs, in a predict output of a parent node or in the nodes own fit output
130+
All keywords have to be either defined in kwargs, in a predict output of a parent node or in the nodes own fit output.
131+
132+
The predict method of each node specifies a list of keyword arguments.
133+
The predict method of each node returns a dictionary of values for keywords of follwing nodes.
134+
135+
This method collects the results of each predict method call and calls the predict methods with the collected values.
136+
For each node, the results of the fit call can also be passed to the predict method
96137
97138
"""
98139

99140
base = Node()
100141
base.predict_output = kwargs
101142

143+
# map all collected kwargs to node whose whose result the kwarg was
102144
available_kwargs = {key: base for key in kwargs.keys()}
103145

104146
node = self
@@ -115,10 +157,13 @@ def predict_traverse(self, **kwargs):
115157

116158
while (node is not None):
117159
prev_node = node
160+
# get necessary kwargs of current node
118161
possible_keywords, defaults = node.get_predict_argspec()
119162

120163
last_required_keyword_index = len(possible_keywords) - len(defaults or [])
121164
required_kwargs = dict()
165+
166+
# get the values to the necessary keywords if available. Use fit result or default if not.
122167
for index, keyword in enumerate(possible_keywords):
123168
if (keyword in available_kwargs):
124169
if (available_kwargs[keyword].predict_output is None):
@@ -131,13 +176,14 @@ def predict_traverse(self, **kwargs):
131176
elif index >= last_required_keyword_index:
132177
required_kwargs[keyword] = defaults[index - last_required_keyword_index]
133178

134-
else:
179+
else: # Neither default specified nor keyword available nor available in fit result of the node
135180
raise ValueError('Node ' + str(type(node)) + ' requires keyword ' + keyword + ' which is not available.')
136181

137182
node.predict_output = node.predict(**required_kwargs)
138183
if (not isinstance(node.predict_output, dict)):
139184
raise ValueError('Node ' + str(type(node)) + ' does not return a dictionary.')
140185

186+
# collect keyword arguments
141187
for keyword in node.predict_output.keys():
142188
if keyword in available_kwargs:
143189
# delete old values

0 commit comments

Comments
 (0)