diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index 640f7b38bbb5..5170f19f348e 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -169,12 +169,13 @@ Dataset <- R6::R6Class( } else { # Check if more categorical features were output over the feature space - if (max(private$categorical_feature) > length(private$colnames)) { + data_is_not_filename <- !is.character(private$raw_data) + if (data_is_not_filename && max(private$categorical_feature) > ncol(private$raw_data)) { stop( "lgb.self.get.handle: supplied a too large value in categorical_feature: " , max(private$categorical_feature) , " but only " - , length(private$colnames) + , ncol(private$raw_data) , " features" ) } diff --git a/R-package/tests/testthat/test_dataset.R b/R-package/tests/testthat/test_dataset.R index 7e3778f44505..9e7afabd02e9 100644 --- a/R-package/tests/testthat/test_dataset.R +++ b/R-package/tests/testthat/test_dataset.R @@ -548,3 +548,18 @@ test_that("lgb.Dataset$get_feature_num_bin() works", { actual_num_bins <- sapply(1L:5L, ds$get_feature_num_bin) expect_identical(actual_num_bins, expected_num_bins) }) + +test_that("lgb.Dataset can be constructed with categorical features and without colnames", { + # check that dataset can be constructed + raw_mat <- matrix(rep(c(0L, 1L), 50L), ncol = 1L) + ds <- lgb.Dataset(raw_mat, categorical_feature = 1L)$construct() + sparse_mat <- as(raw_mat, "dgCMatrix") + ds2 <- lgb.Dataset(sparse_mat, categorical_feature = 1L)$construct() + # check that the column names are NULL + expect_null(ds$.__enclos_env__$private$colnames) + expect_null(ds2$.__enclos_env__$private$colnames) + # check for error when index is greater than the number of columns + expect_error({ + lgb.Dataset(raw_mat, categorical_feature = 2L)$construct() + }, regexp = "supplied a too large value in categorical_feature: 2 but only 1 features") +})