Skip to content

Commit 3e3e0fa

Browse files
authored
address various small issues (#120)
* address various small issues * improve printer * address case when cache folder does not exist anymore * fix CRAN NOTE: bit64 unused (is used internally by data.table) by calling a random function from the package. * remove examples (to avoid issues with CRAN when OpenML is not available) * Improve README * Remove some unnecessary fields from OMLCollection object * Removed `benchmark_grid_oml()` function. * fix: documentation of sugar functions Because ocl no longer has argument `cache`, they cannot share the same documentation file. * caching cannot be set on the instance level anymore * docs: better docu how to find regression tasks * feat: add download method to OpenML objects * increment cache version for parquet parquet files were sometimes missing some columns which (seems to be) addressed now * import from bit64 just to silence CRAN warnings * fix: "Additional issues" of CRAN CHECK https://www.stats.ox.ac.uk/pub/bdr/clang17/README.txt * docs: add tutorial * improve printer of task split * make examples link to other resources * rename file * add pkgdown worklow * improve docs * fix pkgdown workflow * update readme * skip test on cran * fix cran issue * fix readme (undefined link)
1 parent b15e959 commit 3e3e0fa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

60 files changed

+1243
-1160
lines changed

.Rbuildignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,3 +21,5 @@
2121
^\.lintr$
2222
^\.pre-commit-config\.yaml$
2323
^cran-comments\.md$
24+
^vignettes/articles$
25+
^info$

.github/workflows/pkgdown.yml

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
# pkgdown workflow of the mlr3 ecosystem v0.1.0
2+
# https://github.com/mlr-org/actions
3+
on:
4+
push:
5+
branches:
6+
- main
7+
pull_request:
8+
branches:
9+
- main
10+
release:
11+
types:
12+
- published
13+
workflow_dispatch:
14+
15+
name: pkgdown
16+
17+
jobs:
18+
pkgdown:
19+
runs-on: ubuntu-latest
20+
21+
concurrency:
22+
group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
23+
env:
24+
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
25+
steps:
26+
- uses: actions/checkout@v3
27+
28+
- uses: r-lib/actions/setup-pandoc@v2
29+
30+
- uses: r-lib/actions/setup-r@v2
31+
32+
- uses: r-lib/actions/setup-r-dependencies@v2
33+
with:
34+
extra-packages: any::pkgdown, local::.
35+
needs: website
36+
37+
- name: Install template
38+
run: pak::pkg_install("mlr-org/mlr3pkgdowntemplate")
39+
shell: Rscript {0}
40+
41+
- name: Build site
42+
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
43+
shell: Rscript {0}
44+
45+
- name: Deploy
46+
if: github.event_name != 'pull_request'
47+
uses: JamesIves/[email protected]
48+
with:
49+
clean: false
50+
branch: gh-pages
51+
folder: docs

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,5 @@ README.html
4343

4444
TODO.md
4545
lolz.R
46+
47+
*.html

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
Package: mlr3oml
22
Title: Connector Between 'mlr3' and 'OpenML'
3-
Version: 0.7.2-9000
3+
Version: 0.8.0
44
Authors@R: c(
55
person("Michel", "Lang", , "[email protected]", role = "aut",
66
comment = c(ORCID = "0000-0001-9754-0393")),
@@ -45,3 +45,4 @@ Encoding: UTF-8
4545
NeedsCompilation: yes
4646
Roxygen: list(markdown = TRUE)
4747
RoxygenNote: 7.2.3
48+
Config/Needs/website: rmarkdown

NAMESPACE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ export(OMLFlow)
2020
export(OMLObject)
2121
export(OMLRun)
2222
export(OMLTask)
23-
export(benchmark_grid_oml)
2423
export(list_oml_data)
2524
export(list_oml_evaluations)
2625
export(list_oml_flows)
@@ -41,6 +40,7 @@ import(mlr3)
4140
import(mlr3misc)
4241
import(stringi)
4342
importFrom(R6,R6Class)
43+
importFrom(bit64,integer64)
4444
importFrom(methods,hasArg)
4545
importFrom(mlr3,as_benchmark_result)
4646
importFrom(mlr3,as_data_backend)

NEWS.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,18 @@
1-
# mlr3oml 0.7.2-9000
1+
# mlr3oml 0.8.0
2+
3+
* Listing functions don't return the tables invisibly anymore.
4+
* Address CRAN NOTE regarding unused bit64 import.
5+
* Improved the printer for all OpenML objects.
6+
* Removed `benchmark_grid_oml()`, which was already deprecated in release 0.7.2.
7+
* Removed the fields `runs`, `flows`, `data`, `tasks` from the `OMLCollection` class.
8+
Consequently, the `cache` option can no longer be set for `OMLCollection` objects,
9+
see the class documentation for more information.
10+
* Removed the examples, as they caused problems with CRAN checks when OpenML was unavailable.
11+
* Caching can no longer be specified at the instance level but only globally through
12+
the option `mlr3oml.cache`
13+
* Added `$download()` method for all OML objects to fully download an object for offline usage.
14+
* Incremented the cache version for parquet data due to a change in OpenML.
15+
* Added an online tutorial for the package.
216

317
# mlr3oml 0.7.2
418

R/OMLCollection.R

Lines changed: 20 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,8 @@
2323
#' *Note*: All Benchmark Suites on OpenML are also collections.
2424
#'
2525
#' @section Caching:
26-
#' The OpenML collection itself cannot be not cached, this is because it can be modified in-place
27-
#' on the server, e.g. by adding or removing tasks or runs.
28-
#' The construction argument `cache` therefore only controls wether caching is applied to the
29-
#' OpenML objects that are contained in the collection.
26+
#' Because collections on OpenML can be modified (ids can be added), it is not possible to cache
27+
#' this object.
3028
#'
3129
#' @section mlr3 Intergration:
3230
#' * Obtain a list of [mlr3::Task]s using [mlr3::as_tasks].
@@ -37,48 +35,26 @@
3735
#' @references
3836
#' `r format_bib("vanschoren2014")`
3937
#' @export
40-
#' @examples
41-
#' try({
42-
#' library("mlr3")
43-
#' # OpenML Run collection:
44-
#' run_collection = OMLCollection$new(id = 232)
45-
#' # using sugar
46-
#' run_collection = ocl(id = 232)
47-
#' print(run_collection)
48-
#'
49-
#' # OpenML task collection:
50-
#' task_collection = OMLCollection$new(id = 258)
51-
#' # using sugar
52-
#' task_collection = ocl(id = 258)
53-
#' print(task_collection)
54-
#' }, silent = TRUE)
38+
#' @template examples
5539
OMLCollection = R6Class("OMLCollection",
5640
inherit = OMLObject,
5741
public = list(
5842
#' @description
5943
#' Creates a new instance of this [R6][R6::R6Class] class.
6044
#'
6145
#' @template param_id
62-
#' @param cache (`logical(1)` | `character(1)`)\cr
63-
#' See field `cache` for an explanation of possible values.
64-
#' Defaults to value of option `"mlr3oml.cache"`, or `FALSE` if not set.
65-
#' The collection itself is not cached, this is because it can be modified in-place on OpenML,
66-
#' e.g. by adding or removing tasks or runs. This parameter therefore only controls whether
67-
#' the contained elements are cached when loaded, e.g. when accessing the included tasks.
68-
#' @template param_parquet
6946
#' @template param_test_server
7047
initialize = function(
7148
id,
72-
cache = cache_default(),
73-
parquet = parquet_default(),
7449
test_server = test_server_default()
7550
) {
76-
private$.parquet = assert_flag(parquet)
77-
super$initialize(id, cache, test_server, "collection")
51+
super$initialize(id, test_server, "collection")
7852
},
7953
#' @description
8054
#' Prints the object.
8155
print = function() {
56+
# trigger download first for better printing
57+
self$desc
8258
catf("<OMLCollection: %i> %s", self$id, as_short_string(self$name))
8359
catf(" * data: %i", length(self$data_ids))
8460
catf(" * tasks: %i", length(self$task_ids))
@@ -89,6 +65,12 @@ OMLCollection = R6Class("OMLCollection",
8965
if (self$test_server) {
9066
catf(" * Using test server")
9167
}
68+
},
69+
#' @description
70+
#' Downloads the whole object for offline usage.
71+
download = function() {
72+
self$desc
73+
invisible(self)
9274
}
9375
),
9476
active = list(
@@ -124,72 +106,7 @@ OMLCollection = R6Class("OMLCollection",
124106
run_ids = function() self$desc$runs$run_id,
125107
#' @field task_ids (`integer(n)`)\cr
126108
#' An vector containing the task ids of the collection.
127-
task_ids = function() self$desc$task$task_id,
128-
#' @field runs (`data.table()`)
129-
#' A data.table summarizing the runs included in the collection. Returns NULL for
130-
#' Task Collections.
131-
runs = function() {
132-
if (self$main_entity_type == "task") {
133-
messagef("Main entity type is task, returning NULL.")
134-
return(NULL)
135-
}
136-
if (is.null(private$.runs)) {
137-
runs = map(
138-
self$run_ids,
139-
function(x) OMLRun$new(x, cache = self$cache_dir, parquet = self$parquet,
140-
test_server = self$test_server
141-
)
142-
)
143-
144-
private$.runs = make_run_table(runs)
145-
}
146-
return(private$.runs)
147-
},
148-
#' @field flows (`data.table()`)
149-
#' A data.table summarizing the flows included in the collection. Returns `NULL` for
150-
#' Task Collections.
151-
flows = function() {
152-
if (self$main_entity_type == "task") {
153-
messagef("Main entity type is task, returning NULL.")
154-
return(NULL)
155-
}
156-
if (is.null(private$.flows)) {
157-
flows = map(
158-
self$flow_ids,
159-
function(x) OMLFlow$new(x, cache = self$cache_dir, test_server = self$test_server)
160-
)
161-
private$.flows = make_flow_table(flows)
162-
}
163-
return(private$.flows)
164-
},
165-
#' @field data (`data.table()`)
166-
#' A data.table summarizing the datasets included in the collection.
167-
data = function() {
168-
if (is.null(private$.data)) {
169-
datasets = map(
170-
self$data_ids,
171-
function(x) OMLData$new(x, cache = self$cache_dir, parquet = self$parquet,
172-
test_server = self$test_server
173-
)
174-
)
175-
private$.data = make_dataset_table(datasets)
176-
}
177-
return(private$.data)
178-
},
179-
#' @field tasks (`data.table()`)
180-
#' A data.table summarizing the tasks included in the collection.
181-
tasks = function() {
182-
if (is.null(private$.tasks)) {
183-
tasks = map(
184-
self$task_ids,
185-
function(x) OMLTask$new(x, cache = self$cache_dir, parquet = self$parquet,
186-
test_server = self$test_server
187-
)
188-
)
189-
private$.tasks = make_task_table(tasks)
190-
}
191-
return(private$.tasks)
192-
}
109+
task_ids = function() self$desc$task$task_id
193110
),
194111
private = list(
195112
.runs = NULL,
@@ -205,87 +122,25 @@ OMLCollection = R6Class("OMLCollection",
205122
#' @export
206123
as_benchmark_result.OMLCollection = function(x, ...) {
207124
assert_true(x$main_entity_type == "run")
208-
rrs = map(x$runs[["run"]], as_resample_result)
125+
rrs = map(x$run_ids, function(id) as_resample_result(OMLRun$new(id, ...)))
209126
bmr = as_benchmark_result(invoke(c, .args = rrs))
210127
return(bmr)
211128
}
212129

213130
#' @importFrom mlr3 as_tasks
214131
#' @export
215132
as_tasks.OMLCollection = function(x, ...) {
216-
map(x$tasks[["task"]], as_task, ...)
217-
}
218-
219-
#' @importFrom mlr3 as_learners
220-
#' @export
221-
as_learners.OMLCollection = function(x, ...) {
222-
map(x$flows[["flow"]], as_learner, ...)
133+
map(x$task_ids, function(id) tsk("oml", task_id = id, ...))
223134
}
224135

225136
#' @importFrom mlr3 as_resamplings
226137
#' @export
227138
as_resamplings.OMLCollection = function(x, ...) {
228-
map(x$tasks[["task"]], as_resampling, ...)
229-
}
230-
231-
make_task_table = function(tasks) {
232-
g = function(task) {
233-
list(
234-
id = task$id,
235-
task = list(task),
236-
data = as_short_string(task$data$name),
237-
task_type = task$task_type,
238-
target = tryCatch(task$target_names, error = function(x) NA_character_), # can have length > 1
239-
nrow = as.integer(task$data$quality("NumberOfInstances")),
240-
ncol = task$data$quality("NumberOfFeatures"),
241-
missing = task$data$quality("NumberOfMissingValues"),
242-
numeric = task$data$quality("NumberOfNumericFeatures"),
243-
symbolic = task$data$quality("NumberOfSymbolicFeatures"),
244-
binary = task$data$quality("NumberOfBinaryFeatures"),
245-
task_splits = task$estimation_procedure$type %??% "none"
246-
)
247-
}
248-
setkeyv(map_dtr(tasks, g, .fill = TRUE), "id")[]
249-
}
250-
251-
make_flow_table = function(flows) {
252-
g = function(flow) {
253-
list(
254-
id = flow$id,
255-
flow = list(flow),
256-
name = as_short_string(flow$name)
257-
)
258-
}
259-
setkeyv(map_dtr(flows, g), "id")[]
260-
}
261-
262-
make_dataset_table = function(datasets) {
263-
g = function(dataset) {
264-
list(
265-
id = dataset$id,
266-
data = list(dataset),
267-
name = dataset$name,
268-
nrow = as.integer(dataset$quality("NumberOfInstances")),
269-
ncol = dataset$quality("NumberOfFeatures"),
270-
missing = dataset$quality("NumberOfMissingValues"),
271-
numeric = dataset$quality("NumberOfNumericFeatures"),
272-
symbolic = dataset$quality("NumberOfSymbolicFeatures"),
273-
binary = dataset$quality("NumberOfBinaryFeatures")
274-
)
275-
}
276-
setkeyv(map_dtr(datasets, g, .fill = TRUE), "id")[]
139+
map(x$task_ids, function(id) rsmp("oml", task_id = id, ...))
277140
}
278141

279-
make_run_table = function(runs) {
280-
g = function(run) {
281-
list(
282-
id = run$id,
283-
run = list(run),
284-
task_type = run$task_type,
285-
data = as_short_string(run$desc$input_data$dataset$name),
286-
flow = as_short_string(run$desc$flow_name),
287-
task_splits = run$task$estimation_procedure$type
288-
)
289-
}
290-
setkeyv(map_dtr(runs, g, .fill = TRUE), "id")[]
142+
#' @importFrom mlr3 as_learners
143+
#' @export
144+
as_learners.OMLCollection = function(x, ...) {
145+
map(x$flow_ids, function(id) as_learner(OMLFlow$new(id, ...)))
291146
}

0 commit comments

Comments
 (0)