From 3714af3567f9161b77e5bfa1f2b4bfb52e2f4aeb Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 20 Feb 2025 02:08:31 +0800 Subject: [PATCH 1/6] [R] Check invalid input for the cv function. --- R-package/R/callbacks.R | 3 ++ R-package/R/xgb.cv.R | 7 ++++- R-package/tests/testthat/test_basic.R | 34 +++++++++++++++++++++++ R-package/tests/testthat/test_callbacks.R | 2 +- 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index aa8d85246c7e..49a5d99eb480 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -647,6 +647,9 @@ xgb.cb.early.stop <- function( if (inherits(model, "xgb.Booster") && !length(evals)) { stop("For early stopping, 'evals' must have at least one element") } + if (!inherits(model, "xgb.Booster") && keep_all_iter) { + stop("`keep_all_iter` must be set to FALSE for cv.") + } env$begin_iteration <- begin_iteration return(NULL) }, diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index a080a1be5695..94c022114f25 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -113,9 +113,13 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, check.deprecation(deprecated_cv_params, match.call(), ...) stopifnot(inherits(data, "xgb.DMatrix")) + if (inherits(data, "xgb.DMatrix") && .Call(XGCheckNullPtr_R, data)) { stop("'data' is an invalid 'xgb.DMatrix' object. Must be constructed again.") } + if (inherits(data, "xgb.QuantileDMatrix")) { + stop("`xgb.QuantileDMatrix` is not yet supported for the cv function.") + } params <- check.booster.params(params) # TODO: should we deprecate the redundant 'metrics' parameter? @@ -171,7 +175,8 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, xgb.cb.early.stop( early_stopping_rounds, maximize = maximize, - verbose = verbose + verbose = verbose, + keep_all_iter = FALSE ), as_first_elt = TRUE ) diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 6bda4465c6df..55083af7aac8 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -402,6 +402,40 @@ test_that("xgb.cv works", { expect_false(is.null(cv$call)) }) +test_that("xgb.cv invalid inputs", { + data("mtcars") + y <- mtcars$mpg + x_df <- mtcars[, -1] + + expect_error( + cv <- xgb.cv( + data = xgb.QuantileDMatrix(x_df, label = y), + nfold = 5, + nrounds = 2, + params = xgb.params( + max_depth = 2, + nthread = n_threads + ) + ), + regexp = ".*QuantileDMatrix.*" + ) + expect_error( + cv <- xgb.cv( + data = xgb.DMatrix(x_df, label = y), + nfold = 5, + nrounds = 2, + params = xgb.params( + max_depth = 2, + nthread = n_threads, + ), + callbacks = list( + xgb.cb.early.stop(stopping_rounds = 3) + ) + ), + regexp = ".*keep_all_iter.*" + ) +}) + test_that("xgb.cv works with stratified folds", { dtrain <- xgb.DMatrix(train$data, label = train$label, nthread = n_threads) set.seed(314159) diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index 4cf551289525..1dcca775c1dc 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -352,7 +352,7 @@ test_that("early stopping xgb.cv works", { nfold = 5, nrounds = 20, early_stopping_rounds = 3, - maximize = FALSE + maximize = FALSE, ) }, "Stopping. Best iteration" From 5d03c6a69769afbfe52c25aab06fd972fed4bdce Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 20 Feb 2025 02:30:30 +0800 Subject: [PATCH 2/6] unnecessary change. --- R-package/tests/testthat/test_callbacks.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/tests/testthat/test_callbacks.R b/R-package/tests/testthat/test_callbacks.R index 1dcca775c1dc..4cf551289525 100644 --- a/R-package/tests/testthat/test_callbacks.R +++ b/R-package/tests/testthat/test_callbacks.R @@ -352,7 +352,7 @@ test_that("early stopping xgb.cv works", { nfold = 5, nrounds = 20, early_stopping_rounds = 3, - maximize = FALSE, + maximize = FALSE ) }, "Stopping. Best iteration" From afc870182d09d54bf05e54f82bbb9d36a9fbbe7e Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 20 Feb 2025 02:33:07 +0800 Subject: [PATCH 3/6] Doc. --- R-package/R/callbacks.R | 4 +++- R-package/man/xgb.cb.early.stop.Rd | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 49a5d99eb480..00c7963befda 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -613,9 +613,11 @@ xgb.cb.reset.parameters <- function(new_params) { #' `metric_name = 'dtest-auc'` or `metric_name = 'dtest_auc'`. #' All dash '-' characters in metric names are considered equivalent to '_'. #' @param verbose Whether to print the early stopping information. +#' #' @param keep_all_iter Whether to keep all of the boosting rounds that were produced #' in the resulting object. If passing `FALSE`, will only keep the boosting rounds -#' up to the detected best iteration, discarding the ones that come after. +#' up to the detected best iteration, discarding the ones that come after. This +#' parameter is not supported by the `xgb.cv` function and the `gblinear` booster yet. #' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()]. #' @export xgb.cb.early.stop <- function( diff --git a/R-package/man/xgb.cb.early.stop.Rd b/R-package/man/xgb.cb.early.stop.Rd index 2833dbac1630..7b1050b98127 100644 --- a/R-package/man/xgb.cb.early.stop.Rd +++ b/R-package/man/xgb.cb.early.stop.Rd @@ -30,7 +30,8 @@ All dash '-' characters in metric names are considered equivalent to '_'.} \item{keep_all_iter}{Whether to keep all of the boosting rounds that were produced in the resulting object. If passing \code{FALSE}, will only keep the boosting rounds -up to the detected best iteration, discarding the ones that come after.} +up to the detected best iteration, discarding the ones that come after. This +parameter is not supported by the \code{xgb.cv} function and the \code{gblinear} booster yet.} } \value{ An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}. From 25af940701118c144f63f2edf5e41bda23fd6b6c Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 20 Feb 2025 02:40:22 +0800 Subject: [PATCH 4/6] Rename. rename. --- R-package/R/callbacks.R | 18 +++++++++--------- R-package/R/xgb.cv.R | 4 ++-- R-package/man/xgb.cb.early.stop.Rd | 10 +++++----- R-package/tests/testthat/test_basic.R | 4 ++-- python-package/xgboost/callback.py | 8 +++++--- 5 files changed, 23 insertions(+), 21 deletions(-) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 00c7963befda..300f49c73076 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -614,10 +614,10 @@ xgb.cb.reset.parameters <- function(new_params) { #' All dash '-' characters in metric names are considered equivalent to '_'. #' @param verbose Whether to print the early stopping information. #' -#' @param keep_all_iter Whether to keep all of the boosting rounds that were produced -#' in the resulting object. If passing `FALSE`, will only keep the boosting rounds -#' up to the detected best iteration, discarding the ones that come after. This -#' parameter is not supported by the `xgb.cv` function and the `gblinear` booster yet. +#' @param save_best Whether training should return the best model or the last model. If +#' set to `TRUE`, it will only keep the boosting rounds up to the detected best +#' iteration, discarding the ones that come after. This parameter is not supported by +#' the `xgb.cv` function and the `gblinear` booster yet. #' @return An `xgb.Callback` object, which can be passed to [xgb.train()] or [xgb.cv()]. #' @export xgb.cb.early.stop <- function( @@ -625,7 +625,7 @@ xgb.cb.early.stop <- function( maximize = FALSE, metric_name = NULL, verbose = TRUE, - keep_all_iter = TRUE + save_best = FALSE ) { if (!is.null(metric_name)) { stopifnot(is.character(metric_name)) @@ -641,7 +641,7 @@ xgb.cb.early.stop <- function( maximize = maximize, metric_name = metric_name, verbose = verbose, - keep_all_iter = keep_all_iter, + save_best = save_best, stopped_by_max_rounds = FALSE ) ), @@ -649,8 +649,8 @@ xgb.cb.early.stop <- function( if (inherits(model, "xgb.Booster") && !length(evals)) { stop("For early stopping, 'evals' must have at least one element") } - if (!inherits(model, "xgb.Booster") && keep_all_iter) { - stop("`keep_all_iter` must be set to FALSE for cv.") + if (!inherits(model, "xgb.Booster") && save_best) { + stop("'save_best' must be set to FALSE for cv.") } env$begin_iteration <- begin_iteration return(NULL) @@ -736,7 +736,7 @@ xgb.cb.early.stop <- function( return(FALSE) }, f_after_training = function(env, model, data, evals, iteration, final_feval, prev_cb_res) { - if (inherits(model, "xgb.Booster") && !env$keep_all_iter && env$best_iteration < iteration) { + if (inherits(model, "xgb.Booster") && env$save_best && env$best_iteration < iteration) { # Note: it loses the attributes after being sliced, # so they have to be re-assigned afterwards. prev_attr <- xgb.attributes(model) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 94c022114f25..48a0073386c5 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -118,7 +118,7 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, stop("'data' is an invalid 'xgb.DMatrix' object. Must be constructed again.") } if (inherits(data, "xgb.QuantileDMatrix")) { - stop("`xgb.QuantileDMatrix` is not yet supported for the cv function.") + stop("'xgb.QuantileDMatrix' is not yet supported for the cv function.") } params <- check.booster.params(params) @@ -176,7 +176,7 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, early_stopping_rounds, maximize = maximize, verbose = verbose, - keep_all_iter = FALSE + save_best = FALSE ), as_first_elt = TRUE ) diff --git a/R-package/man/xgb.cb.early.stop.Rd b/R-package/man/xgb.cb.early.stop.Rd index 7b1050b98127..2a78e44169a6 100644 --- a/R-package/man/xgb.cb.early.stop.Rd +++ b/R-package/man/xgb.cb.early.stop.Rd @@ -9,7 +9,7 @@ xgb.cb.early.stop( maximize = FALSE, metric_name = NULL, verbose = TRUE, - keep_all_iter = TRUE + save_best = FALSE ) } \arguments{ @@ -28,10 +28,10 @@ All dash '-' characters in metric names are considered equivalent to '_'.} \item{verbose}{Whether to print the early stopping information.} -\item{keep_all_iter}{Whether to keep all of the boosting rounds that were produced -in the resulting object. If passing \code{FALSE}, will only keep the boosting rounds -up to the detected best iteration, discarding the ones that come after. This -parameter is not supported by the \code{xgb.cv} function and the \code{gblinear} booster yet.} +\item{save_best}{Whether training should return the best model or the last model. If +set to \code{TRUE}, it will only keep the boosting rounds up to the detected best +iteration, discarding the ones that come after. This parameter is not supported by +the \code{xgb.cv} function and the \code{gblinear} booster yet.} } \value{ An \code{xgb.Callback} object, which can be passed to \code{\link[=xgb.train]{xgb.train()}} or \code{\link[=xgb.cv]{xgb.cv()}}. diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R index 55083af7aac8..b92c7cbb923d 100644 --- a/R-package/tests/testthat/test_basic.R +++ b/R-package/tests/testthat/test_basic.R @@ -429,10 +429,10 @@ test_that("xgb.cv invalid inputs", { nthread = n_threads, ), callbacks = list( - xgb.cb.early.stop(stopping_rounds = 3) + xgb.cb.early.stop(stopping_rounds = 3, save_best = TRUE) ) ), - regexp = ".*keep_all_iter.*" + regexp = ".*save_best.*" ) }) diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py index 907a54d7b6d7..67832fbc456f 100644 --- a/python-package/xgboost/callback.py +++ b/python-package/xgboost/callback.py @@ -325,9 +325,11 @@ class EarlyStopping(TrainingCallback): maximize : Whether to maximize evaluation metric. None means auto (discouraged). save_best : - Whether training should return the best model or the last model. This is only - supported with tree methods. Also, the `cv` function doesn't return a model, the - parameter is not applicable. + Whether training should return the best model or the last model. If set to + `True`, it will only keep the boosting rounds up to the detected best iteration, + discarding the ones that come after. This is only supported with tree + methods. Also, the `cv` function doesn't return a model, the parameter is not + applicable. min_delta : .. versionadded:: 1.5.0 From 4dd517b54c3eb9a0a2227a3417e8e3470fa50713 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 20 Feb 2025 02:45:14 +0800 Subject: [PATCH 5/6] Fix. --- R-package/R/callbacks.R | 2 +- R-package/R/xgb.cv.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index 300f49c73076..a62d9350cc71 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -650,7 +650,7 @@ xgb.cb.early.stop <- function( stop("For early stopping, 'evals' must have at least one element") } if (!inherits(model, "xgb.Booster") && save_best) { - stop("'save_best' must be set to FALSE for cv.") + stop("'save_best' must be set to FALSE when using early stopping in 'xgb.cv'.") } env$begin_iteration <- begin_iteration return(NULL) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index 48a0073386c5..f81fd51dfec8 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -118,7 +118,7 @@ xgb.cv <- function(params = xgb.params(), data, nrounds, nfold, stop("'data' is an invalid 'xgb.DMatrix' object. Must be constructed again.") } if (inherits(data, "xgb.QuantileDMatrix")) { - stop("'xgb.QuantileDMatrix' is not yet supported for the cv function.") + stop("'xgb.QuantileDMatrix' is not supported as input to 'xgb.cv'.") } params <- check.booster.params(params) From f9e8144d70700c1f2822817cd130c7db31bfd744 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 20 Feb 2025 02:47:22 +0800 Subject: [PATCH 6/6] Mention gblinear in py. --- python-package/xgboost/callback.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py index 67832fbc456f..c5d4f35580d9 100644 --- a/python-package/xgboost/callback.py +++ b/python-package/xgboost/callback.py @@ -327,9 +327,9 @@ class EarlyStopping(TrainingCallback): save_best : Whether training should return the best model or the last model. If set to `True`, it will only keep the boosting rounds up to the detected best iteration, - discarding the ones that come after. This is only supported with tree - methods. Also, the `cv` function doesn't return a model, the parameter is not - applicable. + discarding the ones that come after. This is only supported with tree methods + (not `gblinear`). Also, the `cv` function doesn't return a model, the parameter + is not applicable. min_delta : .. versionadded:: 1.5.0