diff --git a/R-package/.Rbuildignore b/R-package/.Rbuildignore new file mode 100644 index 000000000000..17cfbdb8c5cd --- /dev/null +++ b/R-package/.Rbuildignore @@ -0,0 +1 @@ +^build_package.R$ diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index c87776150d95..cb23e6aa0726 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -5,13 +5,16 @@ Version: 2.1.0 Date: 2018-01-25 Author: Guolin Ke Maintainer: Guolin Ke -Description: LightGBM is a gradient boosting framework that uses tree based learning algorithms. +Description: Tree based algorithms can be improved by introducing boosting frameworks. LightGBM is one such framework, and this package offers an R interface to work with it. It is designed to be distributed and efficient with the following advantages: 1. Faster training speed and higher efficiency. 2. Lower memory usage. 3. Better accuracy. 4. Parallel learning supported. 5. Capable of handling large-scale data. + In recognition of these advantages, LightGBM has being widely-used in many winning solutions of machine learning competitions. + + Comparison experiments on public datasets suggest that LightGBM can outperform existing boosting frameworks on both efficiency and accuracy, with significantly lower memory consumption. In addition, parallel experiments suggest that in certain circumstances, LightGBM can achieve a linear speed-up in training time by using multiple machines. License: MIT + file LICENSE URL: https://github.com/Microsoft/LightGBM BugReports: https://github.com/Microsoft/LightGBM/issues @@ -30,6 +33,7 @@ Depends: R (>= 3.0), R6 (>= 2.0) Imports: + graphics, methods, Matrix (>= 1.1-0), data.table (>= 1.9.6), diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index cf0a27d72819..c43192c8e05d 100755 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -38,6 +38,9 @@ export(slice) import(methods) importFrom(R6,R6Class) importFrom(data.table,":=") +importFrom(data.table,set) +importFrom(graphics,barplot) +importFrom(graphics,par) importFrom(magrittr,"%>%") importFrom(magrittr,"%T>%") useDynLib(lib_lightgbm) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 1e609194a5bd..b302154eaed6 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -619,7 +619,8 @@ Booster <- R6Class( #' @param header only used for prediction for text file. True if text file has header #' @param reshape whether to reshape the vector of predictions to a matrix form when there are several #' prediction outputs per case. - +#' @param ... Additional named arguments passed to the \code{predict()} method of +#' the \code{lgb.Booster} object passed to \code{object}. #' @return #' For regression or binary classification, it returns a vector of length \code{nrows(data)}. #' For multiclass classification, either a \code{num_class * nrows(data)} vector or diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 4a62965ddf84..09bb1ff7fe9f 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -16,10 +16,8 @@ CVBooster <- R6Class( ) ) -#' Main CV logic for LightGBM -#' -#' Main CV logic for LightGBM -#' +#' @title Main CV logic for LightGBM +#' @name lgb.cv #' @param params List of parameters #' @param data a \code{lgb.Dataset} object, used for CV #' @param nrounds number of CV rounds diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R index 52cb8e1d6454..4c0974968e43 100644 --- a/R-package/R/lgb.model.dt.tree.R +++ b/R-package/R/lgb.model.dt.tree.R @@ -3,6 +3,8 @@ #' Parse a LightGBM model json dump into a \code{data.table} structure. #' #' @param model object of class \code{lgb.Booster} +#' @param num_iteration number of iterations you want to predict with. NULL or +#' <= 0 means use best iteration #' #' @return #' A \code{data.table} with detailed information about model trees' nodes and leafs. diff --git a/R-package/R/lgb.plot.importance.R b/R-package/R/lgb.plot.importance.R index 751251ac6e2c..3149d2f6c5c1 100644 --- a/R-package/R/lgb.plot.importance.R +++ b/R-package/R/lgb.plot.importance.R @@ -31,7 +31,7 @@ #' tree_imp <- lgb.importance(model, percentage = TRUE) #' lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") #' } -#' +#' @importFrom graphics barplot par #' @export lgb.plot.importance <- function(tree_imp, top_n = 10, @@ -54,22 +54,24 @@ lgb.plot.importance <- function(tree_imp, } # Refresh plot - op <- par(no.readonly = TRUE) - on.exit(par(op)) + op <- graphics::par(no.readonly = TRUE) + on.exit(graphics::par(op)) # Do some magic plotting - par(mar = op$mar %>% magrittr::inset(., 2, left_margin)) + graphics::par(mar = op$mar %>% magrittr::inset(., 2, left_margin)) # Do plot tree_imp[.N:1, - barplot(height = get(measure), - names.arg = Feature, - horiz = TRUE, - border = NA, - main = "Feature Importance", - xlab = measure, - cex.names = cex, - las = 1)] + graphics::barplot( + height = get(measure), + names.arg = Feature, + horiz = TRUE, + border = NA, + main = "Feature Importance", + xlab = measure, + cex.names = cex, + las = 1 + )] # Return invisibly invisible(tree_imp) diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R index e79cd8581f98..680b52f226a6 100644 --- a/R-package/R/lgb.plot.interpretation.R +++ b/R-package/R/lgb.plot.interpretation.R @@ -36,7 +36,7 @@ #' tree_interpretation <- lgb.interprete(model, test$data, 1:5) #' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) #' } -#' +#' @importFrom graphics barplot par #' @export lgb.plot.interpretation <- function(tree_interpretation_dt, top_n = 10, @@ -48,11 +48,11 @@ lgb.plot.interpretation <- function(tree_interpretation_dt, num_class <- ncol(tree_interpretation_dt) - 1 # Refresh plot - op <- par(no.readonly = TRUE) - on.exit(par(op)) + op <- graphics::par(no.readonly = TRUE) + on.exit(graphics::par(op)) # Do some magic plotting - par(mar = op$mar %>% magrittr::inset(., 1:3, c(3, left_margin, 2))) + graphics::par(mar = op$mar %>% magrittr::inset(., 1:3, c(3, left_margin, 2))) # Check for number of classes if (num_class == 1) { @@ -70,7 +70,7 @@ lgb.plot.interpretation <- function(tree_interpretation_dt, ncol = cols, nrow = ceiling(num_class / cols)) # Shape output - par(mfcol = c(nrow(layout_mat), ncol(layout_mat))) + graphics::par(mfcol = c(nrow(layout_mat), ncol(layout_mat))) # Loop throughout all classes for (i in seq_len(num_class)) { @@ -102,14 +102,16 @@ multiple.tree.plot.interpretation <- function(tree_interpretation, # Do plot tree_interpretation[.N:1, - barplot(height = Contribution, - names.arg = Feature, - horiz = TRUE, - col = ifelse(Contribution > 0, "firebrick", "steelblue"), - border = NA, - main = title, - cex.names = cex, - las = 1)] + graphics::barplot( + height = Contribution, + names.arg = Feature, + horiz = TRUE, + col = ifelse(Contribution > 0, "firebrick", "steelblue"), + border = NA, + main = title, + cex.names = cex, + las = 1 + )] # Return invisibly invisible(NULL) diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index cd986020019f..262d97f7858c 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -1,5 +1,5 @@ -#' Main training logic for LightGBM -#' +#' @title Main training logic for LightGBM +#' @name lgb.train #' @param params List of parameters #' @param data a \code{lgb.Dataset} object, used for training #' @param nrounds number of training rounds diff --git a/R-package/R/lgb.unloader.R b/R-package/R/lgb.unloader.R index 0624e7847eec..e4fb1050fdac 100644 --- a/R-package/R/lgb.unloader.R +++ b/R-package/R/lgb.unloader.R @@ -2,7 +2,7 @@ #' #' Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object. #' -#' @param restart Whether to reload \code{LightGBM} immediately after detaching from R. Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once unloading is performed. +#' @param restore Whether to reload \code{LightGBM} immediately after detaching from R. Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once unloading is performed. #' @param wipe Whether to wipe all \code{lgb.Dataset} and \code{lgb.Booster} from the global environment. Defaults to \code{FALSE} which means to not remove them. #' @param envir The environment to perform wiping on if \code{wipe == TRUE}. Defaults to \code{.GlobalEnv} which is the global environment. #' diff --git a/R-package/R/lightgbm.R b/R-package/R/lightgbm.R index 66feca8efdea..f30d49e606c9 100644 --- a/R-package/R/lightgbm.R +++ b/R-package/R/lightgbm.R @@ -122,5 +122,28 @@ NULL # Various imports #' @import methods #' @importFrom R6 R6Class -#' @useDynLib lightgbm +#' @useDynLib lib_lightgbm NULL + +# Suppress false positive warnings from R CMD CHECK about +# "unrecognized global variable" +globalVariables(c( + "." + , ".N" + , ".SD" + , "Contribution" + , "Cover" + , "Feature" + , "Frequency" + , "Gain" + , "internal_count" + , "internal_value" + , "leaf_index" + , "leaf_parent" + , "leaf_value" + , "node_parent" + , "split_feature" + , "split_gain" + , "split_index" + , "tree_index" +)) diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd index 21d1a0a9c32d..9998a504b59e 100644 --- a/R-package/man/lgb.model.dt.tree.Rd +++ b/R-package/man/lgb.model.dt.tree.Rd @@ -8,6 +8,9 @@ lgb.model.dt.tree(model, num_iteration = NULL) } \arguments{ \item{model}{object of class \code{lgb.Booster}} + +\item{num_iteration}{number of iterations you want to predict with. NULL or +<= 0 means use best iteration} } \value{ A \code{data.table} with detailed information about model trees' nodes and leafs. @@ -25,6 +28,7 @@ The columns of the \code{data.table} are: \item \code{split_gain}: Split gain of a node \item \code{threshold}: Spliting threshold value of a node \item \code{decision_type}: Decision type of a node + \item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right \item \code{internal_value}: Node value \item \code{internal_count}: The number of observation collected by a node \item \code{leaf_value}: Leaf value diff --git a/R-package/man/lgb.plot.importance.Rd b/R-package/man/lgb.plot.importance.Rd index 941e81cc317b..a54530574648 100644 --- a/R-package/man/lgb.plot.importance.Rd +++ b/R-package/man/lgb.plot.importance.Rd @@ -44,5 +44,4 @@ model <- lgb.train(params, dtrain, 20) tree_imp <- lgb.importance(model, percentage = TRUE) lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") } - } diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd index 5356a6ba4df8..3da9d8ae50c0 100644 --- a/R-package/man/lgb.plot.interpretation.Rd +++ b/R-package/man/lgb.plot.interpretation.Rd @@ -49,5 +49,4 @@ model <- lgb.train(params, dtrain, 20) tree_interpretation <- lgb.interprete(model, test$data, 1:5) lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) } - } diff --git a/R-package/man/lgb.prepare.Rd b/R-package/man/lgb.prepare.Rd index 8b309c4e5a8d..14a88efd2473 100644 --- a/R-package/man/lgb.prepare.Rd +++ b/R-package/man/lgb.prepare.Rd @@ -1,51 +1,51 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lgb.prepare.R -\name{lgb.prepare} -\alias{lgb.prepare} -\title{Data preparator for LightGBM datasets (numeric)} -\usage{ -lgb.prepare(data) -} -\arguments{ -\item{data}{A data.frame or data.table to prepare.} -} -\value{ -The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. -} -\description{ -Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets. -} -\examples{ -\dontrun{ -library(lightgbm) -data(iris) - -str(iris) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... - -str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... - -# When lightgbm package is installed, and you do not want to load it -# You can still use the function! -lgb.unloader() -str(lightgbm::lgb.prepare(data = iris)) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... -} - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.prepare.R +\name{lgb.prepare} +\alias{lgb.prepare} +\title{Data preparator for LightGBM datasets (numeric)} +\usage{ +lgb.prepare(data) +} +\arguments{ +\item{data}{A data.frame or data.table to prepare.} +} +\value{ +The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +} +\description{ +Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets. +} +\examples{ +\dontrun{ +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... + +# When lightgbm package is installed, and you do not want to load it +# You can still use the function! +lgb.unloader() +str(lightgbm::lgb.prepare(data = iris)) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... +} + +} diff --git a/R-package/man/lgb.prepare2.Rd b/R-package/man/lgb.prepare2.Rd index c042d19c1d75..cb84885b4f9b 100644 --- a/R-package/man/lgb.prepare2.Rd +++ b/R-package/man/lgb.prepare2.Rd @@ -1,52 +1,52 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lgb.prepare2.R -\name{lgb.prepare2} -\alias{lgb.prepare2} -\title{Data preparator for LightGBM datasets (integer)} -\usage{ -lgb.prepare2(data) -} -\arguments{ -\item{data}{A data.frame or data.table to prepare.} -} -\value{ -The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. -} -\description{ -Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. -} -\examples{ -\dontrun{ -library(lightgbm) -data(iris) - -str(iris) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... - -str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... - -# When lightgbm package is installed, and you do not want to load it -# You can still use the function! -lgb.unloader() -str(lightgbm::lgb.prepare2(data = iris)) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... - -} - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.prepare2.R +\name{lgb.prepare2} +\alias{lgb.prepare2} +\title{Data preparator for LightGBM datasets (integer)} +\usage{ +lgb.prepare2(data) +} +\arguments{ +\item{data}{A data.frame or data.table to prepare.} +} +\value{ +The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +} +\description{ +Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. +} +\examples{ +\dontrun{ +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + +# When lightgbm package is installed, and you do not want to load it +# You can still use the function! +lgb.unloader() +str(lightgbm::lgb.prepare2(data = iris)) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + +} + +} diff --git a/R-package/man/lgb.prepare_rules.Rd b/R-package/man/lgb.prepare_rules.Rd index b3705da58075..9290c8d8e13c 100644 --- a/R-package/man/lgb.prepare_rules.Rd +++ b/R-package/man/lgb.prepare_rules.Rd @@ -1,81 +1,81 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lgb.prepare_rules.R -\name{lgb.prepare_rules} -\alias{lgb.prepare_rules} -\title{Data preparator for LightGBM datasets with rules (numeric)} -\usage{ -lgb.prepare_rules(data, rules = NULL) -} -\arguments{ -\item{data}{A data.frame or data.table to prepare.} - -\item{rules}{A set of rules from the data preparator, if already used.} -} -\value{ -A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. -} -\description{ -Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric. In addition, keeps rules created so you can convert other datasets using this converter. -} -\examples{ -\dontrun{ -library(lightgbm) -data(iris) - -str(iris) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... - -new_iris <- lgb.prepare_rules(data = iris) # Autoconverter -str(new_iris$data) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... - -data(iris) # Erase iris dataset -iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) -# Warning message: -# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : -# invalid factor level, NA generated - -# Use conversion using known rules -# Unknown factors become 0, excellent for sparse datasets -newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) - -# Unknown factor is now zero, perfect for sparse datasets -newer_iris$data[1, ] # Species became 0 as it is an unknown factor -# Sepal.Length Sepal.Width Petal.Length Petal.Width Species -# 1 5.1 3.5 1.4 0.2 0 - -newer_iris$data[1, 5] <- 1 # Put back real initial value - -# Is the newly created dataset equal? YES! -all.equal(new_iris$data, newer_iris$data) -# [1] TRUE - -# Can we test our own rules? -data(iris) # Erase iris dataset - -# We remapped values differently -personal_rules <- list(Species = c("setosa" = 3, - "versicolor" = 2, - "virginica" = 1)) -newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) -str(newest_iris$data) # SUCCESS! -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : num 3 3 3 3 3 3 3 3 3 3 ... - -} - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.prepare_rules.R +\name{lgb.prepare_rules} +\alias{lgb.prepare_rules} +\title{Data preparator for LightGBM datasets with rules (numeric)} +\usage{ +lgb.prepare_rules(data, rules = NULL) +} +\arguments{ +\item{data}{A data.frame or data.table to prepare.} + +\item{rules}{A set of rules from the data preparator, if already used.} +} +\value{ +A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +} +\description{ +Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric. In addition, keeps rules created so you can convert other datasets using this converter. +} +\examples{ +\dontrun{ +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +new_iris <- lgb.prepare_rules(data = iris) # Autoconverter +str(new_iris$data) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 1 1 1 1 1 1 1 1 1 1 ... + +data(iris) # Erase iris dataset +iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +# Warning message: +# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : +# invalid factor level, NA generated + +# Use conversion using known rules +# Unknown factors become 0, excellent for sparse datasets +newer_iris <- lgb.prepare_rules(data = iris, rules = new_iris$rules) + +# Unknown factor is now zero, perfect for sparse datasets +newer_iris$data[1, ] # Species became 0 as it is an unknown factor +# Sepal.Length Sepal.Width Petal.Length Petal.Width Species +# 1 5.1 3.5 1.4 0.2 0 + +newer_iris$data[1, 5] <- 1 # Put back real initial value + +# Is the newly created dataset equal? YES! +all.equal(new_iris$data, newer_iris$data) +# [1] TRUE + +# Can we test our own rules? +data(iris) # Erase iris dataset + +# We remapped values differently +personal_rules <- list(Species = c("setosa" = 3, + "versicolor" = 2, + "virginica" = 1)) +newest_iris <- lgb.prepare_rules(data = iris, rules = personal_rules) +str(newest_iris$data) # SUCCESS! +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : num 3 3 3 3 3 3 3 3 3 3 ... + +} + +} diff --git a/R-package/man/lgb.prepare_rules2.Rd b/R-package/man/lgb.prepare_rules2.Rd index 0b0e799cc7d8..44414c834b97 100644 --- a/R-package/man/lgb.prepare_rules2.Rd +++ b/R-package/man/lgb.prepare_rules2.Rd @@ -1,81 +1,81 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lgb.prepare_rules2.R -\name{lgb.prepare_rules2} -\alias{lgb.prepare_rules2} -\title{Data preparator for LightGBM datasets with rules (integer)} -\usage{ -lgb.prepare_rules2(data, rules = NULL) -} -\arguments{ -\item{data}{A data.frame or data.table to prepare.} - -\item{rules}{A set of rules from the data preparator, if already used.} -} -\value{ -A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. -} -\description{ -Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). In addition, keeps rules created so you can convert other datasets using this converter. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. -} -\examples{ -\dontrun{ -library(lightgbm) -data(iris) - -str(iris) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... - -new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter -str(new_iris$data) -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... - -data(iris) # Erase iris dataset -iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) -# Warning message: -# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : -# invalid factor level, NA generated - -# Use conversion using known rules -# Unknown factors become 0, excellent for sparse datasets -newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) - -# Unknown factor is now zero, perfect for sparse datasets -newer_iris$data[1, ] # Species became 0 as it is an unknown factor -# Sepal.Length Sepal.Width Petal.Length Petal.Width Species -# 1 5.1 3.5 1.4 0.2 0 - -newer_iris$data[1, 5] <- 1 # Put back real initial value - -# Is the newly created dataset equal? YES! -all.equal(new_iris$data, newer_iris$data) -# [1] TRUE - -# Can we test our own rules? -data(iris) # Erase iris dataset - -# We remapped values differently -personal_rules <- list(Species = c("setosa" = 3L, - "versicolor" = 2L, - "virginica" = 1L)) -newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules) -str(newest_iris$data) # SUCCESS! -# 'data.frame': 150 obs. of 5 variables: -# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -# $ Species : int 3 3 3 3 3 3 3 3 3 3 ... - -} - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.prepare_rules2.R +\name{lgb.prepare_rules2} +\alias{lgb.prepare_rules2} +\title{Data preparator for LightGBM datasets with rules (integer)} +\usage{ +lgb.prepare_rules2(data, rules = NULL) +} +\arguments{ +\item{data}{A data.frame or data.table to prepare.} + +\item{rules}{A set of rules from the data preparator, if already used.} +} +\value{ +A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +} +\description{ +Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). In addition, keeps rules created so you can convert other datasets using this converter. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. +} +\examples{ +\dontrun{ +library(lightgbm) +data(iris) + +str(iris) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... + +new_iris <- lgb.prepare_rules2(data = iris) # Autoconverter +str(new_iris$data) +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 1 1 1 1 1 1 1 1 1 1 ... + +data(iris) # Erase iris dataset +iris$Species[1] <- "NEW FACTOR" # Introduce junk factor (NA) +# Warning message: +# In `[<-.factor`(`*tmp*`, 1, value = c(NA, 1L, 1L, 1L, 1L, 1L, 1L, : +# invalid factor level, NA generated + +# Use conversion using known rules +# Unknown factors become 0, excellent for sparse datasets +newer_iris <- lgb.prepare_rules2(data = iris, rules = new_iris$rules) + +# Unknown factor is now zero, perfect for sparse datasets +newer_iris$data[1, ] # Species became 0 as it is an unknown factor +# Sepal.Length Sepal.Width Petal.Length Petal.Width Species +# 1 5.1 3.5 1.4 0.2 0 + +newer_iris$data[1, 5] <- 1 # Put back real initial value + +# Is the newly created dataset equal? YES! +all.equal(new_iris$data, newer_iris$data) +# [1] TRUE + +# Can we test our own rules? +data(iris) # Erase iris dataset + +# We remapped values differently +personal_rules <- list(Species = c("setosa" = 3L, + "versicolor" = 2L, + "virginica" = 1L)) +newest_iris <- lgb.prepare_rules2(data = iris, rules = personal_rules) +str(newest_iris$data) # SUCCESS! +# 'data.frame': 150 obs. of 5 variables: +# $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +# $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +# $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +# $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +# $ Species : int 3 3 3 3 3 3 3 3 3 3 ... + +} + +} diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index b72384935e19..ccfc93d57003 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -75,7 +75,7 @@ If early stopping occurs, the model will have 'best_iter' field} \item{callbacks}{list of callback functions List of callback functions that are applied at each iteration.} -\item{...}{other parameters, see parameters.md for more informations} +\item{...}{other parameters, see Parameters.rst for more informations} \item{valids}{a list of \code{lgb.Dataset} objects, used for validation} @@ -135,7 +135,7 @@ If early stopping occurs, the model will have 'best_iter' field} \item{callbacks}{list of callback functions List of callback functions that are applied at each iteration.} -\item{...}{other parameters, see parameters.md for more informations} +\item{...}{other parameters, see Parameters.rst for more informations} } \value{ a trained model \code{lgb.CVBooster}. @@ -143,10 +143,6 @@ a trained model \code{lgb.CVBooster}. a trained booster model \code{lgb.Booster}. } \description{ -Main CV logic for LightGBM - -Main training logic for LightGBM - Simple interface for training an lightgbm model. Its documentation is combined with lgb.train. } diff --git a/R-package/man/lgb.unloader.Rd b/R-package/man/lgb.unloader.Rd index 391462ece19e..9569eb5f5190 100644 --- a/R-package/man/lgb.unloader.Rd +++ b/R-package/man/lgb.unloader.Rd @@ -7,11 +7,11 @@ lgb.unloader(restore = TRUE, wipe = FALSE, envir = .GlobalEnv) } \arguments{ +\item{restore}{Whether to reload \code{LightGBM} immediately after detaching from R. Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once unloading is performed.} + \item{wipe}{Whether to wipe all \code{lgb.Dataset} and \code{lgb.Booster} from the global environment. Defaults to \code{FALSE} which means to not remove them.} \item{envir}{The environment to perform wiping on if \code{wipe == TRUE}. Defaults to \code{.GlobalEnv} which is the global environment.} - -\item{restart}{Whether to reload \code{LightGBM} immediately after detaching from R. Defaults to \code{TRUE} which means automatically reload \code{LightGBM} once unloading is performed.} } \value{ NULL invisibly. diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd index 820fa18a02b9..cca05eec42b9 100644 --- a/R-package/man/predict.lgb.Booster.Rd +++ b/R-package/man/predict.lgb.Booster.Rd @@ -25,6 +25,9 @@ logistic regression would result in predictions for log-odds instead of probabil \item{reshape}{whether to reshape the vector of predictions to a matrix form when there are several prediction outputs per case.} + +\item{...}{Additional named arguments passed to the \code{predict()} method of +the \code{lgb.Booster} object passed to \code{object}.} } \value{ For regression or binary classification, it returns a vector of length \code{nrows(data)}. diff --git a/R-package/src/Makevars b/R-package/src/Makevars index e69de29bb2d1..8b137891791f 100644 --- a/R-package/src/Makevars +++ b/R-package/src/Makevars @@ -0,0 +1 @@ + diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index e69de29bb2d1..8b137891791f 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -0,0 +1 @@ +