23
23
# ' *Note*: All Benchmark Suites on OpenML are also collections.
24
24
# '
25
25
# ' @section Caching:
26
- # ' The OpenML collection itself cannot be not cached, this is because it can be modified in-place
27
- # ' on the server, e.g. by adding or removing tasks or runs.
28
- # ' The construction argument `cache` therefore only controls wether caching is applied to the
29
- # ' OpenML objects that are contained in the collection.
26
+ # ' Because collections on OpenML can be modified (ids can be added), it is not possible to cache
27
+ # ' this object.
30
28
# '
31
29
# ' @section mlr3 Intergration:
32
30
# ' * Obtain a list of [mlr3::Task]s using [mlr3::as_tasks].
37
35
# ' @references
38
36
# ' `r format_bib("vanschoren2014")`
39
37
# ' @export
40
- # ' @examples
41
- # ' try({
42
- # ' library("mlr3")
43
- # ' # OpenML Run collection:
44
- # ' run_collection = OMLCollection$new(id = 232)
45
- # ' # using sugar
46
- # ' run_collection = ocl(id = 232)
47
- # ' print(run_collection)
48
- # '
49
- # ' # OpenML task collection:
50
- # ' task_collection = OMLCollection$new(id = 258)
51
- # ' # using sugar
52
- # ' task_collection = ocl(id = 258)
53
- # ' print(task_collection)
54
- # ' }, silent = TRUE)
38
+ # ' @template examples
55
39
OMLCollection = R6Class(" OMLCollection" ,
56
40
inherit = OMLObject ,
57
41
public = list (
58
42
# ' @description
59
43
# ' Creates a new instance of this [R6][R6::R6Class] class.
60
44
# '
61
45
# ' @template param_id
62
- # ' @param cache (`logical(1)` | `character(1)`)\cr
63
- # ' See field `cache` for an explanation of possible values.
64
- # ' Defaults to value of option `"mlr3oml.cache"`, or `FALSE` if not set.
65
- # ' The collection itself is not cached, this is because it can be modified in-place on OpenML,
66
- # ' e.g. by adding or removing tasks or runs. This parameter therefore only controls whether
67
- # ' the contained elements are cached when loaded, e.g. when accessing the included tasks.
68
- # ' @template param_parquet
69
46
# ' @template param_test_server
70
47
initialize = function (
71
48
id ,
72
- cache = cache_default(),
73
- parquet = parquet_default(),
74
49
test_server = test_server_default()
75
50
) {
76
- private $ .parquet = assert_flag(parquet )
77
- super $ initialize(id , cache , test_server , " collection" )
51
+ super $ initialize(id , test_server , " collection" )
78
52
},
79
53
# ' @description
80
54
# ' Prints the object.
81
55
print = function () {
56
+ # trigger download first for better printing
57
+ self $ desc
82
58
catf(" <OMLCollection: %i> %s" , self $ id , as_short_string(self $ name ))
83
59
catf(" * data: %i" , length(self $ data_ids ))
84
60
catf(" * tasks: %i" , length(self $ task_ids ))
@@ -89,6 +65,12 @@ OMLCollection = R6Class("OMLCollection",
89
65
if (self $ test_server ) {
90
66
catf(" * Using test server" )
91
67
}
68
+ },
69
+ # ' @description
70
+ # ' Downloads the whole object for offline usage.
71
+ download = function () {
72
+ self $ desc
73
+ invisible (self )
92
74
}
93
75
),
94
76
active = list (
@@ -124,72 +106,7 @@ OMLCollection = R6Class("OMLCollection",
124
106
run_ids = function () self $ desc $ runs $ run_id ,
125
107
# ' @field task_ids (`integer(n)`)\cr
126
108
# ' An vector containing the task ids of the collection.
127
- task_ids = function () self $ desc $ task $ task_id ,
128
- # ' @field runs (`data.table()`)
129
- # ' A data.table summarizing the runs included in the collection. Returns NULL for
130
- # ' Task Collections.
131
- runs = function () {
132
- if (self $ main_entity_type == " task" ) {
133
- messagef(" Main entity type is task, returning NULL." )
134
- return (NULL )
135
- }
136
- if (is.null(private $ .runs )) {
137
- runs = map(
138
- self $ run_ids ,
139
- function (x ) OMLRun $ new(x , cache = self $ cache_dir , parquet = self $ parquet ,
140
- test_server = self $ test_server
141
- )
142
- )
143
-
144
- private $ .runs = make_run_table(runs )
145
- }
146
- return (private $ .runs )
147
- },
148
- # ' @field flows (`data.table()`)
149
- # ' A data.table summarizing the flows included in the collection. Returns `NULL` for
150
- # ' Task Collections.
151
- flows = function () {
152
- if (self $ main_entity_type == " task" ) {
153
- messagef(" Main entity type is task, returning NULL." )
154
- return (NULL )
155
- }
156
- if (is.null(private $ .flows )) {
157
- flows = map(
158
- self $ flow_ids ,
159
- function (x ) OMLFlow $ new(x , cache = self $ cache_dir , test_server = self $ test_server )
160
- )
161
- private $ .flows = make_flow_table(flows )
162
- }
163
- return (private $ .flows )
164
- },
165
- # ' @field data (`data.table()`)
166
- # ' A data.table summarizing the datasets included in the collection.
167
- data = function () {
168
- if (is.null(private $ .data )) {
169
- datasets = map(
170
- self $ data_ids ,
171
- function (x ) OMLData $ new(x , cache = self $ cache_dir , parquet = self $ parquet ,
172
- test_server = self $ test_server
173
- )
174
- )
175
- private $ .data = make_dataset_table(datasets )
176
- }
177
- return (private $ .data )
178
- },
179
- # ' @field tasks (`data.table()`)
180
- # ' A data.table summarizing the tasks included in the collection.
181
- tasks = function () {
182
- if (is.null(private $ .tasks )) {
183
- tasks = map(
184
- self $ task_ids ,
185
- function (x ) OMLTask $ new(x , cache = self $ cache_dir , parquet = self $ parquet ,
186
- test_server = self $ test_server
187
- )
188
- )
189
- private $ .tasks = make_task_table(tasks )
190
- }
191
- return (private $ .tasks )
192
- }
109
+ task_ids = function () self $ desc $ task $ task_id
193
110
),
194
111
private = list (
195
112
.runs = NULL ,
@@ -205,87 +122,25 @@ OMLCollection = R6Class("OMLCollection",
205
122
# ' @export
206
123
as_benchmark_result.OMLCollection = function (x , ... ) {
207
124
assert_true(x $ main_entity_type == " run" )
208
- rrs = map(x $ runs [[ " run " ]], as_resample_result )
125
+ rrs = map(x $ run_ids , function ( id ) as_resample_result( OMLRun $ new( id , ... )) )
209
126
bmr = as_benchmark_result(invoke(c , .args = rrs ))
210
127
return (bmr )
211
128
}
212
129
213
130
# ' @importFrom mlr3 as_tasks
214
131
# ' @export
215
132
as_tasks.OMLCollection = function (x , ... ) {
216
- map(x $ tasks [[" task" ]], as_task , ... )
217
- }
218
-
219
- # ' @importFrom mlr3 as_learners
220
- # ' @export
221
- as_learners.OMLCollection = function (x , ... ) {
222
- map(x $ flows [[" flow" ]], as_learner , ... )
133
+ map(x $ task_ids , function (id ) tsk(" oml" , task_id = id , ... ))
223
134
}
224
135
225
136
# ' @importFrom mlr3 as_resamplings
226
137
# ' @export
227
138
as_resamplings.OMLCollection = function (x , ... ) {
228
- map(x $ tasks [[" task" ]], as_resampling , ... )
229
- }
230
-
231
- make_task_table = function (tasks ) {
232
- g = function (task ) {
233
- list (
234
- id = task $ id ,
235
- task = list (task ),
236
- data = as_short_string(task $ data $ name ),
237
- task_type = task $ task_type ,
238
- target = tryCatch(task $ target_names , error = function (x ) NA_character_ ), # can have length > 1
239
- nrow = as.integer(task $ data $ quality(" NumberOfInstances" )),
240
- ncol = task $ data $ quality(" NumberOfFeatures" ),
241
- missing = task $ data $ quality(" NumberOfMissingValues" ),
242
- numeric = task $ data $ quality(" NumberOfNumericFeatures" ),
243
- symbolic = task $ data $ quality(" NumberOfSymbolicFeatures" ),
244
- binary = task $ data $ quality(" NumberOfBinaryFeatures" ),
245
- task_splits = task $ estimation_procedure $ type %??% " none"
246
- )
247
- }
248
- setkeyv(map_dtr(tasks , g , .fill = TRUE ), " id" )[]
249
- }
250
-
251
- make_flow_table = function (flows ) {
252
- g = function (flow ) {
253
- list (
254
- id = flow $ id ,
255
- flow = list (flow ),
256
- name = as_short_string(flow $ name )
257
- )
258
- }
259
- setkeyv(map_dtr(flows , g ), " id" )[]
260
- }
261
-
262
- make_dataset_table = function (datasets ) {
263
- g = function (dataset ) {
264
- list (
265
- id = dataset $ id ,
266
- data = list (dataset ),
267
- name = dataset $ name ,
268
- nrow = as.integer(dataset $ quality(" NumberOfInstances" )),
269
- ncol = dataset $ quality(" NumberOfFeatures" ),
270
- missing = dataset $ quality(" NumberOfMissingValues" ),
271
- numeric = dataset $ quality(" NumberOfNumericFeatures" ),
272
- symbolic = dataset $ quality(" NumberOfSymbolicFeatures" ),
273
- binary = dataset $ quality(" NumberOfBinaryFeatures" )
274
- )
275
- }
276
- setkeyv(map_dtr(datasets , g , .fill = TRUE ), " id" )[]
139
+ map(x $ task_ids , function (id ) rsmp(" oml" , task_id = id , ... ))
277
140
}
278
141
279
- make_run_table = function (runs ) {
280
- g = function (run ) {
281
- list (
282
- id = run $ id ,
283
- run = list (run ),
284
- task_type = run $ task_type ,
285
- data = as_short_string(run $ desc $ input_data $ dataset $ name ),
286
- flow = as_short_string(run $ desc $ flow_name ),
287
- task_splits = run $ task $ estimation_procedure $ type
288
- )
289
- }
290
- setkeyv(map_dtr(runs , g , .fill = TRUE ), " id" )[]
142
+ # ' @importFrom mlr3 as_learners
143
+ # ' @export
144
+ as_learners.OMLCollection = function (x , ... ) {
145
+ map(x $ flow_ids , function (id ) as_learner(OMLFlow $ new(id , ... )))
291
146
}
0 commit comments