From 6862821b9ab004f6cffc215725ddb0bb153950b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Boudry?= Date: Sat, 23 Nov 2024 21:41:15 +0100 Subject: [PATCH 1/8] Added lgb.plot.tree function. Added DiagrammeR as suggested in DESCRIPTION Added lgb.plot.tree in _pkgdown.yml Roxygenized. --- R-package/DESCRIPTION | 3 +- R-package/R/lgb.plot.tree.R | 184 +++++++++++++++++++++++++++++++++ R-package/man/lgb.plot.tree.Rd | 55 ++++++++++ R-package/pkgdown/_pkgdown.yml | 1 + 4 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 R-package/R/lgb.plot.tree.R create mode 100644 R-package/man/lgb.plot.tree.Rd diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index c9344ceebab7..6660a6e6ab49 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -49,7 +49,8 @@ Suggests: markdown, processx, RhpcBLASctl, - testthat + testthat, + DiagrammeR Depends: R (>= 3.5) Imports: diff --git a/R-package/R/lgb.plot.tree.R b/R-package/R/lgb.plot.tree.R new file mode 100644 index 000000000000..cecef3e99d78 --- /dev/null +++ b/R-package/R/lgb.plot.tree.R @@ -0,0 +1,184 @@ +#' @name lgb.plot.tree +#' @title Plot a single LightGBM tree using DiagrammeR. +#' @description The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. +#' @param model a \code{lgb.Booster} object. +#' @param tree an integer specifying the tree to plot. +#' @param rules a list of rules to replace the split values with feature levels. +#' +#' @return +#' The \code{lgb.plot.tree} function creates a DiagrammeR plot. +#' +#' @details +#' The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. The tree is extracted from the model and displayed as a directed graph. The nodes are labelled with the feature, split value, gain, cover and value. The edges are labelled with the decision type and split value. The nodes are styled with a rectangle shape and filled with a beige colour. Leaf nodes are styled with an oval shape and filled with a khaki colour. The graph is rendered using the dot layout with a left-to-right rank direction. The nodes are coloured dim gray with a filled style and a Helvetica font. The edges are coloured dim gray with a solid style, a 1.5 arrow size, a vee arrowhead and a Helvetica font. +#' +#' @examples +#' \donttest{ +#' # EXAMPLE: use the LightGBM example dataset to build a model with a single tree +#' data(agaricus.train, package = "lightgbm") +#' train <- agaricus.train +#' dtrain <- lgb.Dataset(train$data, label = train$label) +#' data(agaricus.test, package = "lightgbm") +#' test <- agaricus.test +#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +#' # define model parameters and build a single tree +#' params <- list( +#' objective = "regression", +#' metric = "l2", +#' min_data = 1L, +#' learning_rate = 1.0 +#' ) +#' valids <- list(test = dtest) +#' model <- lgb.train( +#' params = params, +#' data = dtrain, +#' nrounds = 1L, +#' valids = valids, +#' early_stopping_rounds = 1L +#' ) +#' # plot the tree and compare to the tree table +#' # trees start from 0 in lgb.model.dt.tree +#' tree_table <- lgb.model.dt.tree(model) +#' lgb.plot.tree(model, 0) +#' } +#' +#' @export + +# function to plot a single LightGBM tree using DiagrammeR +lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) { + # check model is lgb.Booster + if (!inherits(model, "lgb.Booster")) { + stop("model: Has to be an object of class lgb.Booster") + } + # check DiagrammeR is available + if (!requireNamespace("DiagrammeR", quietly = TRUE)) { + stop("DiagrammeR package is required for lgb.plot.tree", + call. = FALSE + ) + } + # tree must be numeric + if (!inherits(tree, "numeric")) { + stop("tree: Has to be an integer numeric") + } + # tree must be integer + if (tree %% 1 != 0) { + stop("tree: Has to be an integer numeric") + } + # extract data.table model structure + dt <- lgb.model.dt.tree(model) + # check that tree is less than or equal to the maximum tree index in the model + if (tree > max(dt$tree_index)) { + stop("tree: has to be less than the number of trees in the model") + } + # filter dt to just the rows for the selected tree + dt <- dt[tree_index == tree, ] + # change the column names to shorter more diagram friendly versions + data.table::setnames(dt, old = c("tree_index", "split_feature", "threshold", "split_gain"), new = c("Tree", "Feature", "Split", "Gain")) + dt[, Value := 0.0] + dt[, Value := leaf_value] + dt[is.na(Value), Value := internal_value] + dt[is.na(Gain), Gain := leaf_value] + dt[is.na(Feature), Feature := "Leaf"] + dt[, Cover := internal_count][Feature == "Leaf", Cover := leaf_count] + dt[, c("leaf_count", "internal_count", "leaf_value", "internal_value") := NULL] + dt[, Node := split_index] + max_node <- max(dt[["Node"]], na.rm = TRUE) + dt[is.na(Node), Node := max_node + leaf_index + 1] + dt[, ID := paste(Tree, Node, sep = "-")] + dt[, c("depth", "leaf_index") := NULL] + dt[, parent := node_parent][is.na(parent), parent := leaf_parent] + dt[, c("node_parent", "leaf_parent", "split_index") := NULL] + dt[, Yes := dt$ID[match(dt$Node, dt$parent)]] + dt <- dt[nrow(dt):1, ] + dt[, No := dt$ID[match(dt$Node, dt$parent)]] + # which way do the NA's go (this path will get a thicker arrow) + # for categorical features, NA gets put into the zero group + dt[default_left == TRUE, Missing := Yes] + dt[default_left == FALSE, Missing := No] + zero_present <- function(x) { + sapply(strsplit(as.character(x), "||", fixed = TRUE), function(el) { + any(el == "0") + }) + } + dt[zero_present(Split), Missing := Yes] + # dt[, c('parent', 'default_left') := NULL] + # data.table::setcolorder(dt, c('Tree','Node','ID','Feature','decision_type','Split','Yes','No','Missing','Gain','Cover','Value')) + # create the label text + dt[, label := paste0( + Feature, + "\nCover: ", Cover, + ifelse(Feature == "Leaf", "", "\nGain: "), ifelse(Feature == "Leaf", "", round(Gain, 4)), + "\nValue: ", round(Value, 4) + )] + # style the nodes - same format as xgboost + dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)] + dt[, shape := "rectangle"][Feature == "Leaf", shape := "oval"] + dt[, filledcolor := "Beige"][Feature == "Leaf", filledcolor := "Khaki"] + # in order to draw the first tree on top: + dt <- dt[order(-Tree)] + nodes <- DiagrammeR::create_node_df( + n = nrow(dt), + ID = dt$ID, + label = dt$label, + fillcolor = dt$filledcolor, + shape = dt$shape, + data = dt$Feature, + fontcolor = "black" + ) + # round the edge labels to 4 s.f. if they are numeric + # as otherwise get too many decimal places and the diagram looks bad + # would rather not use suppressWarnings + numeric_idx <- suppressWarnings(!is.na(as.numeric(dt[["Split"]]))) + dt[numeric_idx, Split := round(as.numeric(Split), 4)] + # replace indices with feature levels if rules supplied + levels.to.names <- function(x, feature_name, rules) { + lvls <- sort(rules[[feature_name]]) + result <- strsplit(x, "||", fixed = TRUE) + result <- lapply(result, as.numeric) + levels_to_names <- function(x) { + names(lvls)[as.numeric(x)] + } + result <- lapply(result, levels_to_names) + result <- lapply(result, paste, collapse = "\n") + result <- as.character(result) + } + if (!is.null(rules)) { + for (f in names(rules)) { + dt[Feature == f & decision_type == "==", Split := levels.to.names(Split, f, rules)] + } + } + # replace long split names with a message + dt[nchar(Split) > 500, Split := "Split too long to render"] + # create the edge labels + edges <- DiagrammeR::create_edge_df( + from = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID), + to = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID), + label = dt[Feature != "Leaf", paste(decision_type, Split)] %>% + c(rep("", nrow(dt[Feature != "Leaf"]))), + style = dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>% + c(dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]), + rel = "leading_to" + ) + # create the graph + graph <- DiagrammeR::create_graph( + nodes_df = nodes, + edges_df = edges, + attr_theme = NULL + ) %>% + DiagrammeR::add_global_graph_attrs( + attr_type = "graph", + attr = c("layout", "rankdir"), + value = c("dot", "LR") + ) %>% + DiagrammeR::add_global_graph_attrs( + attr_type = "node", + attr = c("color", "style", "fontname"), + value = c("DimGray", "filled", "Helvetica") + ) %>% + DiagrammeR::add_global_graph_attrs( + attr_type = "edge", + attr = c("color", "arrowsize", "arrowhead", "fontname"), + value = c("DimGray", "1.5", "vee", "Helvetica") + ) + # render the graph + DiagrammeR::render_graph(graph) +} diff --git a/R-package/man/lgb.plot.tree.Rd b/R-package/man/lgb.plot.tree.Rd new file mode 100644 index 000000000000..e48cfe420265 --- /dev/null +++ b/R-package/man/lgb.plot.tree.Rd @@ -0,0 +1,55 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.plot.tree.R +\name{lgb.plot.tree} +\alias{lgb.plot.tree} +\title{Plot a single LightGBM tree using DiagrammeR.} +\usage{ +lgb.plot.tree(model = NULL, tree = NULL, rules = NULL) +} +\arguments{ +\item{model}{a \code{lgb.Booster} object.} + +\item{tree}{an integer specifying the tree to plot.} + +\item{rules}{a list of rules to replace the split values with feature levels.} +} +\value{ +The \code{lgb.plot.tree} function creates a DiagrammeR plot. +} +\description{ +The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. +} +\details{ +The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. The tree is extracted from the model and displayed as a directed graph. The nodes are labelled with the feature, split value, gain, cover and value. The edges are labelled with the decision type and split value. The nodes are styled with a rectangle shape and filled with a beige colour. Leaf nodes are styled with an oval shape and filled with a khaki colour. The graph is rendered using the dot layout with a left-to-right rank direction. The nodes are coloured dim gray with a filled style and a Helvetica font. The edges are coloured dim gray with a solid style, a 1.5 arrow size, a vee arrowhead and a Helvetica font. +} +\examples{ +\donttest{ +# EXAMPLE: use the LightGBM example dataset to build a model with a single tree +data(agaricus.train, package = "lightgbm") +train <- agaricus.train +dtrain <- lgb.Dataset(train$data, label = train$label) +data(agaricus.test, package = "lightgbm") +test <- agaricus.test +dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) +# define model parameters and build a single tree +params <- list( + objective = "regression", + metric = "l2", + min_data = 1L, + learning_rate = 1.0 +) +valids <- list(test = dtest) +model <- lgb.train( + params = params, + data = dtrain, + nrounds = 1L, + valids = valids, + early_stopping_rounds = 1L +) +# plot the tree and compare to the tree table +# trees start from 0 in lgb.model.dt.tree +tree_table <- lgb.model.dt.tree(model) +lgb.plot.tree(model, 0) +} + +} diff --git a/R-package/pkgdown/_pkgdown.yml b/R-package/pkgdown/_pkgdown.yml index c2d6718a2926..e2a6d7e6c7ac 100644 --- a/R-package/pkgdown/_pkgdown.yml +++ b/R-package/pkgdown/_pkgdown.yml @@ -97,6 +97,7 @@ reference: - '`lgb.interprete`' - '`lgb.plot.importance`' - '`lgb.plot.interpretation`' + - '`lgb.plot.tree`' - '`print.lgb.Booster`' - '`summary.lgb.Booster`' - title: Multithreading Control From 0a7ea0e433c067b633913694d30b4917336f9db9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Boudry?= Date: Sat, 23 Nov 2024 23:00:51 +0100 Subject: [PATCH 2/8] Added tests. --- R-package/NAMESPACE | 1 + R-package/tests/testthat/test_lgb.plot.tree.R | 59 +++++++++++++++++++ 2 files changed, 60 insertions(+) create mode 100644 R-package/tests/testthat/test_lgb.plot.tree.R diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index 49ef2b5cb8fc..4f5c308ac3df 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -29,6 +29,7 @@ export(lgb.make_serializable) export(lgb.model.dt.tree) export(lgb.plot.importance) export(lgb.plot.interpretation) +export(lgb.plot.tree) export(lgb.restore_handle) export(lgb.save) export(lgb.slice.Dataset) diff --git a/R-package/tests/testthat/test_lgb.plot.tree.R b/R-package/tests/testthat/test_lgb.plot.tree.R new file mode 100644 index 000000000000..64b462e186ec --- /dev/null +++ b/R-package/tests/testthat/test_lgb.plot.tree.R @@ -0,0 +1,59 @@ +test_that("lgb.plot.tree works as expected"){ + data(agaricus.train, package = "lightgbm") + train <- agaricus.train + dtrain <- lgb.Dataset(train$data, label = train$label) + data(agaricus.test, package = "lightgbm") + test <- agaricus.test + dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) + # define model parameters and build a single tree + params <- list( + objective = "regression" + , metric = "l2" + , min_data = 1L + , learning_rate = 1.0 + ) + valids <- list(test = dtest) + model <- lgb.train( + params = params + , data = dtrain + , nrounds = 1L + , valids = valids + , early_stopping_rounds = 1L + ) + # plot the tree and compare to the tree table + # trees start from 0 in lgb.model.dt.tree + tree_table <- lgb.model.dt.tree(model) + expect_true({ + lgb.plot.tree(model, 0)TRUE + }) +} + +test_that("lgb.plot.tree fails when a non existing tree is selected"){ + data(agaricus.train, package = "lightgbm") + train <- agaricus.train + dtrain <- lgb.Dataset(train$data, label = train$label) + data(agaricus.test, package = "lightgbm") + test <- agaricus.test + dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) + # define model parameters and build a single tree + params <- list( + objective = "regression" + , metric = "l2" + , min_data = 1L + , learning_rate = 1.0 + ) + valids <- list(test = dtest) + model <- lgb.train( + params = params + , data = dtrain + , nrounds = 1L + , valids = valids + , early_stopping_rounds = 1L + ) + # plot the tree and compare to the tree table + # trees start from 0 in lgb.model.dt.tree + tree_table <- lgb.model.dt.tree(model) + expect_error({ + lgb.plot.tree(model, 999)TRUE + }) +} From 757dc847288886287a76eb7f2d076e01d3104ac7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Boudry?= Date: Tue, 24 Dec 2024 19:58:02 +0100 Subject: [PATCH 3/8] Added review suggestions. DiagrammeR in CI. Error messages. Default parameters. Changed tests. --- .ci/test-r-package-windows.ps1 | 2 +- R-package/DESCRIPTION | 2 +- R-package/R/lgb.plot.tree.R | 199 +++++++++--------- R-package/tests/testthat/test_lgb.plot.tree.R | 44 ++-- 4 files changed, 119 insertions(+), 128 deletions(-) diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1 index a3f524b60be7..857b2789cbbc 100644 --- a/.ci/test-r-package-windows.ps1 +++ b/.ci/test-r-package-windows.ps1 @@ -177,7 +177,7 @@ Write-Output "Done installing CMake" Write-Output "Installing dependencies" $packages = -join @( - "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), ", + "c('data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), ", "dependencies = c('Imports', 'Depends', 'LinkingTo')" ) $params = -join @( diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 6660a6e6ab49..096265331d59 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -45,12 +45,12 @@ NeedsCompilation: yes Biarch: true VignetteBuilder: knitr Suggests: + DiagrammeR, knitr, markdown, processx, RhpcBLASctl, testthat, - DiagrammeR Depends: R (>= 3.5) Imports: diff --git a/R-package/R/lgb.plot.tree.R b/R-package/R/lgb.plot.tree.R index cecef3e99d78..c7ee06c5abbd 100644 --- a/R-package/R/lgb.plot.tree.R +++ b/R-package/R/lgb.plot.tree.R @@ -1,15 +1,15 @@ #' @name lgb.plot.tree -#' @title Plot a single LightGBM tree using DiagrammeR. +#' @title Plot a single LightGBM tree. #' @description The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. #' @param model a \code{lgb.Booster} object. -#' @param tree an integer specifying the tree to plot. +#' @param tree an integer specifying the tree to plot. This is 1-based, so e.g. a value of '7' means 'the 7th tree' (tree_index=6 in LightGBM's underlying representation). #' @param rules a list of rules to replace the split values with feature levels. #' #' @return #' The \code{lgb.plot.tree} function creates a DiagrammeR plot. #' #' @details -#' The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. The tree is extracted from the model and displayed as a directed graph. The nodes are labelled with the feature, split value, gain, cover and value. The edges are labelled with the decision type and split value. The nodes are styled with a rectangle shape and filled with a beige colour. Leaf nodes are styled with an oval shape and filled with a khaki colour. The graph is rendered using the dot layout with a left-to-right rank direction. The nodes are coloured dim gray with a filled style and a Helvetica font. The edges are coloured dim gray with a solid style, a 1.5 arrow size, a vee arrowhead and a Helvetica font. +#' The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. The tree is extracted from the model and displayed as a directed graph. The nodes are labelled with the feature, split value, gain, cover and value. The edges are labelled with the decision type and split value. #' #' @examples #' \donttest{ @@ -23,9 +23,7 @@ #' # define model parameters and build a single tree #' params <- list( #' objective = "regression", -#' metric = "l2", #' min_data = 1L, -#' learning_rate = 1.0 #' ) #' valids <- list(test = dtest) #' model <- lgb.train( @@ -43,142 +41,151 @@ #' #' @export -# function to plot a single LightGBM tree using DiagrammeR lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) { # check model is lgb.Booster - if (!inherits(model, "lgb.Booster")) { - stop("model: Has to be an object of class lgb.Booster") + if (!.is_Booster(x = model)) { + stop("lgb.plot.tree: model should be an ", sQuote("lgb.Booster")) } - # check DiagrammeR is available if (!requireNamespace("DiagrammeR", quietly = TRUE)) { - stop("DiagrammeR package is required for lgb.plot.tree", + stop("lgb.plot.tree: DiagrammeR package is required", call. = FALSE ) } # tree must be numeric if (!inherits(tree, "numeric")) { - stop("tree: Has to be an integer numeric") + stop("lgb.plot.tree: Has to be an integer numeric") } # tree must be integer if (tree %% 1 != 0) { - stop("tree: Has to be an integer numeric") + stop("lgb.plot.tree: Has to be an integer numeric") } # extract data.table model structure - dt <- lgb.model.dt.tree(model) + modelDT <- lgb.model.dt.tree(model) # check that tree is less than or equal to the maximum tree index in the model - if (tree > max(dt$tree_index)) { - stop("tree: has to be less than the number of trees in the model") + if (tree > max(modelDT$tree_index)) { + stop("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index, "). Got: ," tree, ".") } - # filter dt to just the rows for the selected tree - dt <- dt[tree_index == tree, ] + # filter modelDT to just the rows for the selected tree + modelDT <- modelDT[tree_index == tree, ] # change the column names to shorter more diagram friendly versions - data.table::setnames(dt, old = c("tree_index", "split_feature", "threshold", "split_gain"), new = c("Tree", "Feature", "Split", "Gain")) - dt[, Value := 0.0] - dt[, Value := leaf_value] - dt[is.na(Value), Value := internal_value] - dt[is.na(Gain), Gain := leaf_value] - dt[is.na(Feature), Feature := "Leaf"] - dt[, Cover := internal_count][Feature == "Leaf", Cover := leaf_count] - dt[, c("leaf_count", "internal_count", "leaf_value", "internal_value") := NULL] - dt[, Node := split_index] - max_node <- max(dt[["Node"]], na.rm = TRUE) - dt[is.na(Node), Node := max_node + leaf_index + 1] - dt[, ID := paste(Tree, Node, sep = "-")] - dt[, c("depth", "leaf_index") := NULL] - dt[, parent := node_parent][is.na(parent), parent := leaf_parent] - dt[, c("node_parent", "leaf_parent", "split_index") := NULL] - dt[, Yes := dt$ID[match(dt$Node, dt$parent)]] - dt <- dt[nrow(dt):1, ] - dt[, No := dt$ID[match(dt$Node, dt$parent)]] + data.table::setnames(modelDT, old = c("tree_index", "split_feature", "threshold", "split_gain"), new = c("Tree", "Feature", "Split", "Gain")) + modelDT[, Value := 0.0] + modelDT[, Value := leaf_value] + modelDT[is.na(Value), Value := internal_value] + modelDT[is.na(Gain), Gain := leaf_value] + modelDT[is.na(Feature), Feature := "Leaf"] + modelDT[, Cover := internal_count][Feature == "Leaf", Cover := leaf_count] + modelDT[, c("leaf_count", "internal_count", "leaf_value", "internal_value") := NULL] + modelDT[, Node := split_index] + max_node <- max(modelDT[["Node"]], na.rm = TRUE) + modelDT[is.na(Node), Node := max_node + leaf_index + 1] + modelDT[, ID := paste(Tree, Node, sep = "-")] + modelDT[, c("depth", "leaf_index") := NULL] + modelDT[, parent := node_parent][is.na(parent), parent := leaf_parent] + modelDT[, c("node_parent", "leaf_parent", "split_index") := NULL] + modelDT[, Yes := modelDT$ID[match(modelDT$Node, modelDT$parent)]] + modelDT <- modelDT[nrow(modelDT):1, ] + modelDT[, No := modelDT$ID[match(modelDT$Node, modelDT$parent)]] # which way do the NA's go (this path will get a thicker arrow) # for categorical features, NA gets put into the zero group - dt[default_left == TRUE, Missing := Yes] - dt[default_left == FALSE, Missing := No] - zero_present <- function(x) { - sapply(strsplit(as.character(x), "||", fixed = TRUE), function(el) { - any(el == "0") - }) - } - dt[zero_present(Split), Missing := Yes] - # dt[, c('parent', 'default_left') := NULL] - # data.table::setcolorder(dt, c('Tree','Node','ID','Feature','decision_type','Split','Yes','No','Missing','Gain','Cover','Value')) + modelDT[default_left == TRUE, Missing := Yes] + modelDT[default_left == FALSE, Missing := No] + modelDT[.zero_present(Split), Missing := Yes] + # modelDT[, c('parent', 'default_left') := NULL] + # data.table::setcolorder(modelDT, c('Tree','Node','ID','Feature','decision_type','Split','Yes','No','Missing','Gain','Cover','Value')) # create the label text - dt[, label := paste0( + modelDT[, label := paste0( Feature, "\nCover: ", Cover, ifelse(Feature == "Leaf", "", "\nGain: "), ifelse(Feature == "Leaf", "", round(Gain, 4)), "\nValue: ", round(Value, 4) )] # style the nodes - same format as xgboost - dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)] - dt[, shape := "rectangle"][Feature == "Leaf", shape := "oval"] - dt[, filledcolor := "Beige"][Feature == "Leaf", filledcolor := "Khaki"] + modelDT[Node == 0, label := paste0("Tree ", Tree, "\n", label)] + modelDT[, shape := "rectangle"][Feature == "Leaf", shape := "oval"] + modelDT[, filledcolor := "Beige"][Feature == "Leaf", filledcolor := "Khaki"] # in order to draw the first tree on top: - dt <- dt[order(-Tree)] + modelDT <- modelDT[order(-Tree)] nodes <- DiagrammeR::create_node_df( - n = nrow(dt), - ID = dt$ID, - label = dt$label, - fillcolor = dt$filledcolor, - shape = dt$shape, - data = dt$Feature, - fontcolor = "black" + n = nrow(modelDT) + , ID = modelDT$ID + , label = modelDT$label + , fillcolor = modelDT$filledcolor + , shape = modelDT$shape + , data = modelDT$Feature + , fontcolor = "black" ) # round the edge labels to 4 s.f. if they are numeric # as otherwise get too many decimal places and the diagram looks bad # would rather not use suppressWarnings - numeric_idx <- suppressWarnings(!is.na(as.numeric(dt[["Split"]]))) - dt[numeric_idx, Split := round(as.numeric(Split), 4)] + numeric_idx <- suppressWarnings(!is.na(as.numeric(modelDT[["Split"]]))) + modelDT[numeric_idx, Split := round(as.numeric(Split), 4)] # replace indices with feature levels if rules supplied - levels.to.names <- function(x, feature_name, rules) { - lvls <- sort(rules[[feature_name]]) - result <- strsplit(x, "||", fixed = TRUE) - result <- lapply(result, as.numeric) - levels_to_names <- function(x) { - names(lvls)[as.numeric(x)] - } - result <- lapply(result, levels_to_names) - result <- lapply(result, paste, collapse = "\n") - result <- as.character(result) - } + if (!is.null(rules)) { for (f in names(rules)) { - dt[Feature == f & decision_type == "==", Split := levels.to.names(Split, f, rules)] + modelDT[Feature == f & decision_type == "==", Split := .levels.to.names(Split, f, rules)] } } # replace long split names with a message - dt[nchar(Split) > 500, Split := "Split too long to render"] + modelDT[nchar(Split) > 500, Split := "Split too long to render"] # create the edge labels edges <- DiagrammeR::create_edge_df( - from = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID), - to = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID), - label = dt[Feature != "Leaf", paste(decision_type, Split)] %>% - c(rep("", nrow(dt[Feature != "Leaf"]))), - style = dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>% - c(dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]), + from = match(modelDT[Feature != "Leaf", c(ID)] %>% rep(2), modelDT$ID), + to = match(modelDT[Feature != "Leaf", c(Yes, No)], modelDT$ID), + label = modelDT[Feature != "Leaf", paste(decision_type, Split)] %>% + c(rep("", nrow(modelDT[Feature != "Leaf"]))), + style = modelDT[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>% + c(modelDT[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]), rel = "leading_to" ) # create the graph graph <- DiagrammeR::create_graph( - nodes_df = nodes, - edges_df = edges, - attr_theme = NULL - ) %>% - DiagrammeR::add_global_graph_attrs( - attr_type = "graph", - attr = c("layout", "rankdir"), - value = c("dot", "LR") - ) %>% - DiagrammeR::add_global_graph_attrs( - attr_type = "node", - attr = c("color", "style", "fontname"), - value = c("DimGray", "filled", "Helvetica") - ) %>% - DiagrammeR::add_global_graph_attrs( - attr_type = "edge", - attr = c("color", "arrowsize", "arrowhead", "fontname"), - value = c("DimGray", "1.5", "vee", "Helvetica") + nodes_df = nodes + , edges_df = edges + , attr_theme = NULL + ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph + , attr_type = "graph" + , attr = c("layout", "rankdir") + , value = c("dot", "LR") ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph + , attr_type = "node" + , attr = c("color", "style", "fontname") + , value = c("DimGray", "filled", "Helvetica") + ) + graph <- DiagrammeR::add_global_graph_attrs( + graph = graph + , attr_type = "edge" + , attr = c("color", "arrowsize", "arrowhead", "fontname") + , value = c("DimGray", "1.5", "vee", "Helvetica") + ) # render the graph DiagrammeR::render_graph(graph) + return(invisible(NULL)) } + +.zero_present <- function(x) { + sapply(strsplit(as.character(x), "||", fixed = TRUE), function(el) { + any(el == "0") + }) + return(invisible(NULL)) +} + +.levels.to.names <- function(x, feature_name, rules) { + lvls <- sort(rules[[feature_name]]) + result <- strsplit(x, "||", fixed = TRUE) + result <- lapply(result, as.numeric) + result <- lapply(result, .levels_to_names) + result <- lapply(result, paste, collapse = "\n") + result <- as.character(result) + return(invisible(NULL)) +} + +.levels_to_names <- function(x) { + names(lvls)[as.numeric(x)] + return(invisible(NULL)) +} \ No newline at end of file diff --git a/R-package/tests/testthat/test_lgb.plot.tree.R b/R-package/tests/testthat/test_lgb.plot.tree.R index 64b462e186ec..c53cf3422090 100644 --- a/R-package/tests/testthat/test_lgb.plot.tree.R +++ b/R-package/tests/testthat/test_lgb.plot.tree.R @@ -2,58 +2,42 @@ test_that("lgb.plot.tree works as expected"){ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) - data(agaricus.test, package = "lightgbm") - test <- agaricus.test - dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) # define model parameters and build a single tree - params <- list( - objective = "regression" - , metric = "l2" - , min_data = 1L - , learning_rate = 1.0 - ) - valids <- list(test = dtest) model <- lgb.train( - params = params + params = list( + objective = "regression" + , num_threads = .LGB_MAX_THREADS + ) , data = dtrain , nrounds = 1L - , valids = valids - , early_stopping_rounds = 1L + , verbose = .LGB_VERBOSITY ) # plot the tree and compare to the tree table # trees start from 0 in lgb.model.dt.tree tree_table <- lgb.model.dt.tree(model) expect_true({ - lgb.plot.tree(model, 0)TRUE - }) + lgb.plot.tree(model, 0) + }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model") } test_that("lgb.plot.tree fails when a non existing tree is selected"){ data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) - data(agaricus.test, package = "lightgbm") - test <- agaricus.test - dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) # define model parameters and build a single tree - params <- list( - objective = "regression" - , metric = "l2" - , min_data = 1L - , learning_rate = 1.0 - ) - valids <- list(test = dtest) model <- lgb.train( - params = params + params = list( + objective = "regression" + , num_threads = .LGB_MAX_THREADS + ) , data = dtrain , nrounds = 1L - , valids = valids - , early_stopping_rounds = 1L + , verbose = .LGB_VERBOSITY ) # plot the tree and compare to the tree table # trees start from 0 in lgb.model.dt.tree tree_table <- lgb.model.dt.tree(model) expect_error({ - lgb.plot.tree(model, 999)TRUE - }) + lgb.plot.tree(model, 999) + }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model") } From 55aba68c7fac9273fc9e125eac8faa0aef222c41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Boudry?= Date: Wed, 25 Dec 2024 17:34:42 +0100 Subject: [PATCH 4/8] Updated tests. (based on R-package/tests/testthat/test_lgb.model.dt.tree.R) Now tests regressions, binary, multiclass classification and ranks. --- R-package/tests/testthat/test_lgb.plot.tree.R | 137 +++++++++++++----- 1 file changed, 98 insertions(+), 39 deletions(-) diff --git a/R-package/tests/testthat/test_lgb.plot.tree.R b/R-package/tests/testthat/test_lgb.plot.tree.R index c53cf3422090..54440b9825dd 100644 --- a/R-package/tests/testthat/test_lgb.plot.tree.R +++ b/R-package/tests/testthat/test_lgb.plot.tree.R @@ -1,43 +1,102 @@ -test_that("lgb.plot.tree works as expected"){ - data(agaricus.train, package = "lightgbm") - train <- agaricus.train - dtrain <- lgb.Dataset(train$data, label = train$label) - # define model parameters and build a single tree - model <- lgb.train( - params = list( - objective = "regression" - , num_threads = .LGB_MAX_THREADS - ) - , data = dtrain - , nrounds = 1L - , verbose = .LGB_VERBOSITY +NROUNDS <- 10L +MAX_DEPTH <- 3L +N <- nrow(iris) +X <- data.matrix(iris[2L:4L]) +FEAT <- colnames(X) +NCLASS <- nlevels(iris[, 5L]) + +model_reg <- lgb.train( + params = list( + objective = "regression" + , num_threads = .LGB_MAX_THREADS + , max.depth = MAX_DEPTH ) - # plot the tree and compare to the tree table - # trees start from 0 in lgb.model.dt.tree - tree_table <- lgb.model.dt.tree(model) - expect_true({ - lgb.plot.tree(model, 0) - }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model") -} + , data = lgb.Dataset(X, label = iris[, 1L]) + , verbose = .LGB_VERBOSITY + , nrounds = NROUNDS +) + +model_binary <- lgb.train( + params = list( + objective = "binary" + , num_threads = .LGB_MAX_THREADS + , max.depth = MAX_DEPTH + ) + , data = lgb.Dataset(X, label = iris[, 5L] == "setosa") + , verbose = .LGB_VERBOSITY + , nrounds = NROUNDS +) + +model_multiclass <- lgb.train( + params = list( + objective = "multiclass" + , num_threads = .LGB_MAX_THREADS + , max.depth = MAX_DEPTH + , num_classes = NCLASS + ) + , data = lgb.Dataset(X, label = as.integer(iris[, 5L]) - 1L) + , verbose = .LGB_VERBOSITY + , nrounds = NROUNDS +) -test_that("lgb.plot.tree fails when a non existing tree is selected"){ - data(agaricus.train, package = "lightgbm") - train <- agaricus.train - dtrain <- lgb.Dataset(train$data, label = train$label) - # define model parameters and build a single tree - model <- lgb.train( - params = list( - objective = "regression" - , num_threads = .LGB_MAX_THREADS - ) - , data = dtrain - , nrounds = 1L - , verbose = .LGB_VERBOSITY +model_rank <- lgb.train( + params = list( + objective = "lambdarank" + , num_threads = .LGB_MAX_THREADS + , max.depth = MAX_DEPTH + , lambdarank_truncation_level = 3L ) - # plot the tree and compare to the tree table - # trees start from 0 in lgb.model.dt.tree - tree_table <- lgb.model.dt.tree(model) - expect_error({ - lgb.plot.tree(model, 999) - }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model") + , data = lgb.Dataset( + X + , label = as.integer(iris[, 1L] > 5.8) + , group = rep(10L, times = 15L) + ) + , verbose = .LGB_VERBOSITY + , nrounds = NROUNDS +) + +models <- list( + reg = model_reg + , bin = model_binary + , multi = model_multiclass + , rank = model_rank +) + +for (model_name in names(models)){ + model <- models[[model_name]] + expected_n_trees <- NROUNDS + if (model_name == "multi") { + expected_n_trees <- NROUNDS * NCLASS + } + df <- as.data.frame(lgb.model.dt.tree(model)) + df_list <- split(df, f = df$tree_index, drop = TRUE) + df_leaf <- df[!is.na(df$leaf_index), ] + df_internal <- df[is.na(df$leaf_index), ] + + test_that("lgb.plot.tree fails when a non existing tree is selected", { + expect_error({ + lgb.plot.tree(model, 0) + }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model") + }) + test_that("lgb.plot.tree fails when a non existing tree is selected", { + expect_error({ + lgb.plot.tree(model, 999) + }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model") + }) + test_that("lgb.plot.tree fails when a non numeric tree is selected", { + expect_error({ + lgb.plot.tree(model, "a") + }, regexp = "lgb.plot.tree: Has to be an integer numeric") + }) + test_that("lgb.plot.tree fails when a non integer tree is selected", { + expect_error({ + lgb.plot.tree(model, 1.5) + }, regexp = "lgb.plot.tree: Has to be an integer numeric") + }) + test_that("lgb.plot.tree fails when a non lgb.Booster model is passed", { + expect_error({ + lgb.plot.tree(1, 0) + }, regexp = "lgb.plot.tree: model should be an 'lgb.Booster'") + }) } + From 85ff97aa733001c06085c6057422d4b8bc582aa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Boudry?= Date: Wed, 25 Dec 2024 18:57:34 +0100 Subject: [PATCH 5/8] Corrected error (missing comma) in the selected tree check (L66). Commented code. --- R-package/R/lgb.plot.tree.R | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/R-package/R/lgb.plot.tree.R b/R-package/R/lgb.plot.tree.R index c7ee06c5abbd..c020977fda5f 100644 --- a/R-package/R/lgb.plot.tree.R +++ b/R-package/R/lgb.plot.tree.R @@ -40,12 +40,12 @@ #' } #' #' @export - lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) { # check model is lgb.Booster if (!.is_Booster(x = model)) { stop("lgb.plot.tree: model should be an ", sQuote("lgb.Booster")) } + # check DiagrammeR is available if (!requireNamespace("DiagrammeR", quietly = TRUE)) { stop("lgb.plot.tree: DiagrammeR package is required", call. = FALSE @@ -63,26 +63,36 @@ lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) { modelDT <- lgb.model.dt.tree(model) # check that tree is less than or equal to the maximum tree index in the model if (tree > max(modelDT$tree_index)) { - stop("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index, "). Got: ," tree, ".") + stop("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index), "). Got: ", tree, ".") } # filter modelDT to just the rows for the selected tree modelDT <- modelDT[tree_index == tree, ] # change the column names to shorter more diagram friendly versions - data.table::setnames(modelDT, old = c("tree_index", "split_feature", "threshold", "split_gain"), new = c("Tree", "Feature", "Split", "Gain")) - modelDT[, Value := 0.0] + data.table::setnames(modelDT + , old = c("tree_index", "split_feature", "threshold", "split_gain") + , new = c("Tree", "Feature", "Split", "Gain")) + # assign leaf_value to the Value column in modelDT modelDT[, Value := leaf_value] + # assign new values if NA modelDT[is.na(Value), Value := internal_value] modelDT[is.na(Gain), Gain := leaf_value] modelDT[is.na(Feature), Feature := "Leaf"] + # assign internal_count to Cover, and if Feature is "Leaf", assign leaf_count to Cover modelDT[, Cover := internal_count][Feature == "Leaf", Cover := leaf_count] + # remove unnecessary columns modelDT[, c("leaf_count", "internal_count", "leaf_value", "internal_value") := NULL] + # assign split_index to Node modelDT[, Node := split_index] + # find the maximum value of Node, if Node is NA, assign max_node + leaf_index + 1 to Node max_node <- max(modelDT[["Node"]], na.rm = TRUE) modelDT[is.na(Node), Node := max_node + leaf_index + 1] + # adding ID column modelDT[, ID := paste(Tree, Node, sep = "-")] + # remove unnecessary columns modelDT[, c("depth", "leaf_index") := NULL] modelDT[, parent := node_parent][is.na(parent), parent := leaf_parent] modelDT[, c("node_parent", "leaf_parent", "split_index") := NULL] + # assign the IDs of the matching parent nodes to Yes and No modelDT[, Yes := modelDT$ID[match(modelDT$Node, modelDT$parent)]] modelDT <- modelDT[nrow(modelDT):1, ] modelDT[, No := modelDT$ID[match(modelDT$Node, modelDT$parent)]] @@ -91,14 +101,16 @@ lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) { modelDT[default_left == TRUE, Missing := Yes] modelDT[default_left == FALSE, Missing := No] modelDT[.zero_present(Split), Missing := Yes] - # modelDT[, c('parent', 'default_left') := NULL] - # data.table::setcolorder(modelDT, c('Tree','Node','ID','Feature','decision_type','Split','Yes','No','Missing','Gain','Cover','Value')) # create the label text modelDT[, label := paste0( - Feature, - "\nCover: ", Cover, - ifelse(Feature == "Leaf", "", "\nGain: "), ifelse(Feature == "Leaf", "", round(Gain, 4)), - "\nValue: ", round(Value, 4) + Feature + , "\nCover: " + , Cover + , ifelse(Feature == "Leaf", "", "\nGain: "), ifelse(Feature == "Leaf" + , "" + , round(Gain, 4)) + , "\nValue: " + , round(Value, 4) )] # style the nodes - same format as xgboost modelDT[Node == 0, label := paste0("Tree ", Tree, "\n", label)] From b4b648ab6522d305ce9e2c9f30c494c82b4e285f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Boudry?= Date: Wed, 25 Dec 2024 23:29:58 +0100 Subject: [PATCH 6/8] Corrected tests. Added a warning to functions and shorter stop message to make tests work. --- R-package/R/lgb.plot.tree.R | 5 +++-- R-package/tests/testthat/test_lgb.plot.tree.R | 15 ++++----------- 2 files changed, 7 insertions(+), 13 deletions(-) diff --git a/R-package/R/lgb.plot.tree.R b/R-package/R/lgb.plot.tree.R index c020977fda5f..5df73f2de17e 100644 --- a/R-package/R/lgb.plot.tree.R +++ b/R-package/R/lgb.plot.tree.R @@ -62,8 +62,9 @@ lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) { # extract data.table model structure modelDT <- lgb.model.dt.tree(model) # check that tree is less than or equal to the maximum tree index in the model - if (tree > max(modelDT$tree_index)) { - stop("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index), "). Got: ", tree, ".") + if (tree > max(modelDT$tree_index) || tree < 1) { + warning("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index), "). Got: ", tree, ".") + stop("lgb.plot.tree: Invalid tree number") } # filter modelDT to just the rows for the selected tree modelDT <- modelDT[tree_index == tree, ] diff --git a/R-package/tests/testthat/test_lgb.plot.tree.R b/R-package/tests/testthat/test_lgb.plot.tree.R index 54440b9825dd..857b61030544 100644 --- a/R-package/tests/testthat/test_lgb.plot.tree.R +++ b/R-package/tests/testthat/test_lgb.plot.tree.R @@ -64,24 +64,17 @@ models <- list( for (model_name in names(models)){ model <- models[[model_name]] - expected_n_trees <- NROUNDS - if (model_name == "multi") { - expected_n_trees <- NROUNDS * NCLASS - } - df <- as.data.frame(lgb.model.dt.tree(model)) - df_list <- split(df, f = df$tree_index, drop = TRUE) - df_leaf <- df[!is.na(df$leaf_index), ] - df_internal <- df[is.na(df$leaf_index), ] + modelDT <- lgb.model.dt.tree(model) test_that("lgb.plot.tree fails when a non existing tree is selected", { expect_error({ lgb.plot.tree(model, 0) - }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model") + }, regexp = paste0("lgb.plot.tree: Invalid tree number")) }) test_that("lgb.plot.tree fails when a non existing tree is selected", { expect_error({ lgb.plot.tree(model, 999) - }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model") + }, regexp = paste0("lgb.plot.tree: Invalid tree number")) }) test_that("lgb.plot.tree fails when a non numeric tree is selected", { expect_error({ @@ -96,7 +89,7 @@ for (model_name in names(models)){ test_that("lgb.plot.tree fails when a non lgb.Booster model is passed", { expect_error({ lgb.plot.tree(1, 0) - }, regexp = "lgb.plot.tree: model should be an 'lgb.Booster'") + }, regexp = paste0("lgb.plot.tree: model should be an ", sQuote("lgb.Booster"))) }) } From ed6244119239ef539d32829b9da099c1c7695911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Boudry?= Date: Mon, 30 Dec 2024 10:47:56 +0100 Subject: [PATCH 7/8] Added DiagrammeR in CI, github and Azure workflows as well as in the README. --- .ci/test-r-package.sh | 2 +- .github/workflows/r_package.yml | 4 ++-- .github/workflows/static_analysis.yml | 2 +- .vsts-ci.yml | 2 +- R-package/README.md | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh index 2e414ec0d282..55d37e6dff03 100755 --- a/.ci/test-r-package.sh +++ b/.ci/test-r-package.sh @@ -114,7 +114,7 @@ Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/A # Manually install Depends and Imports libraries + 'knitr', 'markdown', 'RhpcBLASctl', 'testthat' # to avoid a CI-time dependency on devtools (for devtools::install_deps()) -packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')" +packages="c('data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')" compile_from_source="both" if [[ $OS_NAME == "macos" ]]; then packages+=", type = 'binary'" diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml index 66e05a18ba1f..c8506a414215 100644 --- a/.github/workflows/r_package.yml +++ b/.github/workflows/r_package.yml @@ -230,7 +230,7 @@ jobs: - name: Install packages shell: bash run: | - RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" + RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" sh build-cran-package.sh --r-executable=RD${{ matrix.r_customization }} RD${{ matrix.r_customization }} CMD INSTALL lightgbm_*.tar.gz || exit 1 - name: Run tests with sanitizers @@ -295,7 +295,7 @@ jobs: - name: Install packages and run tests shell: bash run: | - Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" + Rscript -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" sh build-cran-package.sh # 'rchk' isn't run through 'R CMD check', use the approach documented at diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml index 34e573e0eea6..872ef9dbac14 100644 --- a/.github/workflows/static_analysis.yml +++ b/.github/workflows/static_analysis.yml @@ -64,7 +64,7 @@ jobs: - name: Install packages shell: bash run: | - Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'roxygen2', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" + Rscript -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'roxygen2', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" sh build-cran-package.sh || exit 1 R CMD INSTALL --with-keep.source lightgbm_*.tar.gz || exit 1 - name: Test documentation diff --git a/.vsts-ci.yml b/.vsts-ci.yml index 40424840c82d..6f99e37189cf 100644 --- a/.vsts-ci.yml +++ b/.vsts-ci.yml @@ -392,7 +392,7 @@ jobs: R_LIB_PATH=~/Rlib export R_LIBS=${R_LIB_PATH} mkdir -p ${R_LIB_PATH} - RDscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" || exit 1 + RDscript -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" || exit 1 sh build-cran-package.sh --r-executable=RD || exit 1 mv lightgbm_${LGB_VER}.tar.gz $(Build.ArtifactStagingDirectory)/lightgbm-${LGB_VER}-r-cran.tar.gz displayName: 'Build CRAN R-package' diff --git a/R-package/README.md b/R-package/README.md index f1821f5cc6be..8900f5c5ccec 100644 --- a/R-package/README.md +++ b/R-package/README.md @@ -428,7 +428,7 @@ docker run \ # install dependencies RDscript${R_CUSTOMIZATION} \ - -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" + -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())" # install lightgbm sh build-cran-package.sh --r-executable=RD${R_CUSTOMIZATION} @@ -459,7 +459,7 @@ docker run \ -it \ wch1/r-debug -RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" +RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" sh build-cran-package.sh \ --r-executable=RDvalgrind From 2710705cef9bb4e0c6ec7489c9419ffe58d2922c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Tue, 31 Dec 2024 13:05:01 -0600 Subject: [PATCH 8/8] Update R-package/DESCRIPTION --- R-package/DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 096265331d59..4f3730b25593 100755 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -50,7 +50,7 @@ Suggests: markdown, processx, RhpcBLASctl, - testthat, + testthat Depends: R (>= 3.5) Imports: