From 6862821b9ab004f6cffc215725ddb0bb153950b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Boudry?= <felix.boudry@univ-perp.fr>
Date: Sat, 23 Nov 2024 21:41:15 +0100
Subject: [PATCH 1/8] Added lgb.plot.tree function. Added DiagrammeR as
 suggested in DESCRIPTION Added lgb.plot.tree in _pkgdown.yml Roxygenized.

---
 R-package/DESCRIPTION          |   3 +-
 R-package/R/lgb.plot.tree.R    | 184 +++++++++++++++++++++++++++++++++
 R-package/man/lgb.plot.tree.Rd |  55 ++++++++++
 R-package/pkgdown/_pkgdown.yml |   1 +
 4 files changed, 242 insertions(+), 1 deletion(-)
 create mode 100644 R-package/R/lgb.plot.tree.R
 create mode 100644 R-package/man/lgb.plot.tree.Rd

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index c9344ceebab7..6660a6e6ab49 100755
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -49,7 +49,8 @@ Suggests:
     markdown,
     processx,
     RhpcBLASctl,
-    testthat
+    testthat,
+    DiagrammeR
 Depends:
     R (>= 3.5)
 Imports:
diff --git a/R-package/R/lgb.plot.tree.R b/R-package/R/lgb.plot.tree.R
new file mode 100644
index 000000000000..cecef3e99d78
--- /dev/null
+++ b/R-package/R/lgb.plot.tree.R
@@ -0,0 +1,184 @@
+#' @name lgb.plot.tree
+#' @title Plot a single LightGBM tree using DiagrammeR.
+#' @description The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree.
+#' @param model a \code{lgb.Booster} object.
+#' @param tree an integer specifying the tree to plot.
+#' @param rules a list of rules to replace the split values with feature levels.
+#'
+#' @return
+#' The \code{lgb.plot.tree} function creates a DiagrammeR plot.
+#'
+#' @details
+#' The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. The tree is extracted from the model and displayed as a directed graph. The nodes are labelled with the feature, split value, gain, cover and value. The edges are labelled with the decision type and split value. The nodes are styled with a rectangle shape and filled with a beige colour. Leaf nodes are styled with an oval shape and filled with a khaki colour. The graph is rendered using the dot layout with a left-to-right rank direction. The nodes are coloured dim gray with a filled style and a Helvetica font. The edges are coloured dim gray with a solid style, a 1.5 arrow size, a vee arrowhead and a Helvetica font.
+#'
+#' @examples
+#' \donttest{
+#' # EXAMPLE: use the LightGBM example dataset to build a model with a single tree
+#' data(agaricus.train, package = "lightgbm")
+#' train <- agaricus.train
+#' dtrain <- lgb.Dataset(train$data, label = train$label)
+#' data(agaricus.test, package = "lightgbm")
+#' test <- agaricus.test
+#' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
+#' # define model parameters and build a single tree
+#' params <- list(
+#'     objective = "regression",
+#'     metric = "l2",
+#'     min_data = 1L,
+#'     learning_rate = 1.0
+#' )
+#' valids <- list(test = dtest)
+#' model <- lgb.train(
+#'     params = params,
+#'     data = dtrain,
+#'     nrounds = 1L,
+#'     valids = valids,
+#'     early_stopping_rounds = 1L
+#' )
+#' # plot the tree and compare to the tree table
+#' # trees start from 0 in lgb.model.dt.tree
+#' tree_table <- lgb.model.dt.tree(model)
+#' lgb.plot.tree(model, 0)
+#' }
+#'
+#' @export
+
+# function to plot a single LightGBM tree using DiagrammeR
+lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) {
+    # check model is lgb.Booster
+    if (!inherits(model, "lgb.Booster")) {
+        stop("model: Has to be an object of class lgb.Booster")
+    }
+    # check DiagrammeR is available
+    if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
+        stop("DiagrammeR package is required for lgb.plot.tree",
+            call. = FALSE
+        )
+    }
+    # tree must be numeric
+    if (!inherits(tree, "numeric")) {
+        stop("tree: Has to be an integer numeric")
+    }
+    # tree must be integer
+    if (tree %% 1 != 0) {
+        stop("tree: Has to be an integer numeric")
+    }
+    # extract data.table model structure
+    dt <- lgb.model.dt.tree(model)
+    # check that tree is less than or equal to the maximum tree index in the model
+    if (tree > max(dt$tree_index)) {
+        stop("tree: has to be less than the number of trees in the model")
+    }
+    # filter dt to just the rows for the selected tree
+    dt <- dt[tree_index == tree, ]
+    # change the column names to shorter more diagram friendly versions
+    data.table::setnames(dt, old = c("tree_index", "split_feature", "threshold", "split_gain"), new = c("Tree", "Feature", "Split", "Gain"))
+    dt[, Value := 0.0]
+    dt[, Value := leaf_value]
+    dt[is.na(Value), Value := internal_value]
+    dt[is.na(Gain), Gain := leaf_value]
+    dt[is.na(Feature), Feature := "Leaf"]
+    dt[, Cover := internal_count][Feature == "Leaf", Cover := leaf_count]
+    dt[, c("leaf_count", "internal_count", "leaf_value", "internal_value") := NULL]
+    dt[, Node := split_index]
+    max_node <- max(dt[["Node"]], na.rm = TRUE)
+    dt[is.na(Node), Node := max_node + leaf_index + 1]
+    dt[, ID := paste(Tree, Node, sep = "-")]
+    dt[, c("depth", "leaf_index") := NULL]
+    dt[, parent := node_parent][is.na(parent), parent := leaf_parent]
+    dt[, c("node_parent", "leaf_parent", "split_index") := NULL]
+    dt[, Yes := dt$ID[match(dt$Node, dt$parent)]]
+    dt <- dt[nrow(dt):1, ]
+    dt[, No := dt$ID[match(dt$Node, dt$parent)]]
+    # which way do the NA's go (this path will get a thicker arrow)
+    # for categorical features, NA gets put into the zero group
+    dt[default_left == TRUE, Missing := Yes]
+    dt[default_left == FALSE, Missing := No]
+    zero_present <- function(x) {
+        sapply(strsplit(as.character(x), "||", fixed = TRUE), function(el) {
+            any(el == "0")
+        })
+    }
+    dt[zero_present(Split), Missing := Yes]
+    # dt[, c('parent', 'default_left') := NULL]
+    # data.table::setcolorder(dt, c('Tree','Node','ID','Feature','decision_type','Split','Yes','No','Missing','Gain','Cover','Value'))
+    # create the label text
+    dt[, label := paste0(
+        Feature,
+        "\nCover: ", Cover,
+        ifelse(Feature == "Leaf", "", "\nGain: "), ifelse(Feature == "Leaf", "", round(Gain, 4)),
+        "\nValue: ", round(Value, 4)
+    )]
+    # style the nodes - same format as xgboost
+    dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)]
+    dt[, shape := "rectangle"][Feature == "Leaf", shape := "oval"]
+    dt[, filledcolor := "Beige"][Feature == "Leaf", filledcolor := "Khaki"]
+    # in order to draw the first tree on top:
+    dt <- dt[order(-Tree)]
+    nodes <- DiagrammeR::create_node_df(
+        n         = nrow(dt),
+        ID        = dt$ID,
+        label     = dt$label,
+        fillcolor = dt$filledcolor,
+        shape     = dt$shape,
+        data      = dt$Feature,
+        fontcolor = "black"
+    )
+    # round the edge labels to 4 s.f. if they are numeric
+    # as otherwise get too many decimal places and the diagram looks bad
+    # would rather not use suppressWarnings
+    numeric_idx <- suppressWarnings(!is.na(as.numeric(dt[["Split"]])))
+    dt[numeric_idx, Split := round(as.numeric(Split), 4)]
+    # replace indices with feature levels if rules supplied
+    levels.to.names <- function(x, feature_name, rules) {
+        lvls <- sort(rules[[feature_name]])
+        result <- strsplit(x, "||", fixed = TRUE)
+        result <- lapply(result, as.numeric)
+        levels_to_names <- function(x) {
+            names(lvls)[as.numeric(x)]
+        }
+        result <- lapply(result, levels_to_names)
+        result <- lapply(result, paste, collapse = "\n")
+        result <- as.character(result)
+    }
+    if (!is.null(rules)) {
+        for (f in names(rules)) {
+            dt[Feature == f & decision_type == "==", Split := levels.to.names(Split, f, rules)]
+        }
+    }
+    # replace long split names with a message
+    dt[nchar(Split) > 500, Split := "Split too long to render"]
+    # create the edge labels
+    edges <- DiagrammeR::create_edge_df(
+        from = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID),
+        to = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID),
+        label = dt[Feature != "Leaf", paste(decision_type, Split)] %>%
+            c(rep("", nrow(dt[Feature != "Leaf"]))),
+        style = dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>%
+            c(dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]),
+        rel = "leading_to"
+    )
+    # create the graph
+    graph <- DiagrammeR::create_graph(
+        nodes_df = nodes,
+        edges_df = edges,
+        attr_theme = NULL
+    ) %>%
+        DiagrammeR::add_global_graph_attrs(
+            attr_type = "graph",
+            attr = c("layout", "rankdir"),
+            value = c("dot", "LR")
+        ) %>%
+        DiagrammeR::add_global_graph_attrs(
+            attr_type = "node",
+            attr = c("color", "style", "fontname"),
+            value = c("DimGray", "filled", "Helvetica")
+        ) %>%
+        DiagrammeR::add_global_graph_attrs(
+            attr_type = "edge",
+            attr = c("color", "arrowsize", "arrowhead", "fontname"),
+            value = c("DimGray", "1.5", "vee", "Helvetica")
+        )
+    # render the graph
+    DiagrammeR::render_graph(graph)
+}
diff --git a/R-package/man/lgb.plot.tree.Rd b/R-package/man/lgb.plot.tree.Rd
new file mode 100644
index 000000000000..e48cfe420265
--- /dev/null
+++ b/R-package/man/lgb.plot.tree.Rd
@@ -0,0 +1,55 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/lgb.plot.tree.R
+\name{lgb.plot.tree}
+\alias{lgb.plot.tree}
+\title{Plot a single LightGBM tree using DiagrammeR.}
+\usage{
+lgb.plot.tree(model = NULL, tree = NULL, rules = NULL)
+}
+\arguments{
+\item{model}{a \code{lgb.Booster} object.}
+
+\item{tree}{an integer specifying the tree to plot.}
+
+\item{rules}{a list of rules to replace the split values with feature levels.}
+}
+\value{
+The \code{lgb.plot.tree} function creates a DiagrammeR plot.
+}
+\description{
+The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree.
+}
+\details{
+The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. The tree is extracted from the model and displayed as a directed graph. The nodes are labelled with the feature, split value, gain, cover and value. The edges are labelled with the decision type and split value. The nodes are styled with a rectangle shape and filled with a beige colour. Leaf nodes are styled with an oval shape and filled with a khaki colour. The graph is rendered using the dot layout with a left-to-right rank direction. The nodes are coloured dim gray with a filled style and a Helvetica font. The edges are coloured dim gray with a solid style, a 1.5 arrow size, a vee arrowhead and a Helvetica font.
+}
+\examples{
+\donttest{
+# EXAMPLE: use the LightGBM example dataset to build a model with a single tree
+data(agaricus.train, package = "lightgbm")
+train <- agaricus.train
+dtrain <- lgb.Dataset(train$data, label = train$label)
+data(agaricus.test, package = "lightgbm")
+test <- agaricus.test
+dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
+# define model parameters and build a single tree
+params <- list(
+    objective = "regression",
+    metric = "l2",
+    min_data = 1L,
+    learning_rate = 1.0
+)
+valids <- list(test = dtest)
+model <- lgb.train(
+    params = params,
+    data = dtrain,
+    nrounds = 1L,
+    valids = valids,
+    early_stopping_rounds = 1L
+)
+# plot the tree and compare to the tree table
+# trees start from 0 in lgb.model.dt.tree
+tree_table <- lgb.model.dt.tree(model)
+lgb.plot.tree(model, 0)
+}
+
+}
diff --git a/R-package/pkgdown/_pkgdown.yml b/R-package/pkgdown/_pkgdown.yml
index c2d6718a2926..e2a6d7e6c7ac 100644
--- a/R-package/pkgdown/_pkgdown.yml
+++ b/R-package/pkgdown/_pkgdown.yml
@@ -97,6 +97,7 @@ reference:
     - '`lgb.interprete`'
     - '`lgb.plot.importance`'
     - '`lgb.plot.interpretation`'
+    - '`lgb.plot.tree`'
     - '`print.lgb.Booster`'
     - '`summary.lgb.Booster`'
   - title: Multithreading Control

From 0a7ea0e433c067b633913694d30b4917336f9db9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Boudry?= <felix.boudry@univ-perp.fr>
Date: Sat, 23 Nov 2024 23:00:51 +0100
Subject: [PATCH 2/8] Added tests.

---
 R-package/NAMESPACE                           |  1 +
 R-package/tests/testthat/test_lgb.plot.tree.R | 59 +++++++++++++++++++
 2 files changed, 60 insertions(+)
 create mode 100644 R-package/tests/testthat/test_lgb.plot.tree.R

diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE
index 49ef2b5cb8fc..4f5c308ac3df 100644
--- a/R-package/NAMESPACE
+++ b/R-package/NAMESPACE
@@ -29,6 +29,7 @@ export(lgb.make_serializable)
 export(lgb.model.dt.tree)
 export(lgb.plot.importance)
 export(lgb.plot.interpretation)
+export(lgb.plot.tree)
 export(lgb.restore_handle)
 export(lgb.save)
 export(lgb.slice.Dataset)
diff --git a/R-package/tests/testthat/test_lgb.plot.tree.R b/R-package/tests/testthat/test_lgb.plot.tree.R
new file mode 100644
index 000000000000..64b462e186ec
--- /dev/null
+++ b/R-package/tests/testthat/test_lgb.plot.tree.R
@@ -0,0 +1,59 @@
+test_that("lgb.plot.tree works as expected"){
+  data(agaricus.train, package = "lightgbm")
+  train <- agaricus.train
+  dtrain <- lgb.Dataset(train$data, label = train$label)
+  data(agaricus.test, package = "lightgbm")
+  test <- agaricus.test
+  dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
+  # define model parameters and build a single tree
+  params <- list(
+    objective = "regression"
+    , metric = "l2"
+    , min_data = 1L
+    , learning_rate = 1.0
+  )
+  valids <- list(test = dtest)
+  model <- lgb.train(
+    params = params
+    , data = dtrain
+    , nrounds = 1L
+    , valids = valids
+    , early_stopping_rounds = 1L
+  )
+  # plot the tree and compare to the tree table
+  # trees start from 0 in lgb.model.dt.tree
+  tree_table <- lgb.model.dt.tree(model)
+  expect_true({
+    lgb.plot.tree(model, 0)TRUE
+  })
+}
+
+test_that("lgb.plot.tree fails when a non existing tree is selected"){
+  data(agaricus.train, package = "lightgbm")
+  train <- agaricus.train
+  dtrain <- lgb.Dataset(train$data, label = train$label)
+  data(agaricus.test, package = "lightgbm")
+  test <- agaricus.test
+  dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
+  # define model parameters and build a single tree
+  params <- list(
+    objective = "regression"
+    , metric = "l2"
+    , min_data = 1L
+    , learning_rate = 1.0
+  )
+  valids <- list(test = dtest)
+  model <- lgb.train(
+    params = params
+    , data = dtrain
+    , nrounds = 1L
+    , valids = valids
+    , early_stopping_rounds = 1L
+  )
+  # plot the tree and compare to the tree table
+  # trees start from 0 in lgb.model.dt.tree
+  tree_table <- lgb.model.dt.tree(model)
+  expect_error({
+    lgb.plot.tree(model, 999)TRUE
+  })
+}

From 757dc847288886287a76eb7f2d076e01d3104ac7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Boudry?= <felix.boudry@univ-perp.fr>
Date: Tue, 24 Dec 2024 19:58:02 +0100
Subject: [PATCH 3/8] Added review suggestions. DiagrammeR in CI. Error
 messages. Default parameters. Changed tests.

---
 .ci/test-r-package-windows.ps1                |   2 +-
 R-package/DESCRIPTION                         |   2 +-
 R-package/R/lgb.plot.tree.R                   | 199 +++++++++---------
 R-package/tests/testthat/test_lgb.plot.tree.R |  44 ++--
 4 files changed, 119 insertions(+), 128 deletions(-)

diff --git a/.ci/test-r-package-windows.ps1 b/.ci/test-r-package-windows.ps1
index a3f524b60be7..857b2789cbbc 100644
--- a/.ci/test-r-package-windows.ps1
+++ b/.ci/test-r-package-windows.ps1
@@ -177,7 +177,7 @@ Write-Output "Done installing CMake"
 
 Write-Output "Installing dependencies"
 $packages = -join @(
-    "c('data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), ",
+    "c('data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'processx', 'R6', 'RhpcBLASctl', 'testthat'), ",
     "dependencies = c('Imports', 'Depends', 'LinkingTo')"
 )
 $params = -join @(
diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 6660a6e6ab49..096265331d59 100755
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -45,12 +45,12 @@ NeedsCompilation: yes
 Biarch: true
 VignetteBuilder: knitr
 Suggests:
+    DiagrammeR,
     knitr,
     markdown,
     processx,
     RhpcBLASctl,
     testthat,
-    DiagrammeR
 Depends:
     R (>= 3.5)
 Imports:
diff --git a/R-package/R/lgb.plot.tree.R b/R-package/R/lgb.plot.tree.R
index cecef3e99d78..c7ee06c5abbd 100644
--- a/R-package/R/lgb.plot.tree.R
+++ b/R-package/R/lgb.plot.tree.R
@@ -1,15 +1,15 @@
 #' @name lgb.plot.tree
-#' @title Plot a single LightGBM tree using DiagrammeR.
+#' @title Plot a single LightGBM tree.
 #' @description The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree.
 #' @param model a \code{lgb.Booster} object.
-#' @param tree an integer specifying the tree to plot.
+#' @param tree an integer specifying the tree to plot. This is 1-based, so e.g. a value of '7' means 'the 7th tree' (tree_index=6 in LightGBM's underlying representation).
 #' @param rules a list of rules to replace the split values with feature levels.
 #'
 #' @return
 #' The \code{lgb.plot.tree} function creates a DiagrammeR plot.
 #'
 #' @details
-#' The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. The tree is extracted from the model and displayed as a directed graph. The nodes are labelled with the feature, split value, gain, cover and value. The edges are labelled with the decision type and split value. The nodes are styled with a rectangle shape and filled with a beige colour. Leaf nodes are styled with an oval shape and filled with a khaki colour. The graph is rendered using the dot layout with a left-to-right rank direction. The nodes are coloured dim gray with a filled style and a Helvetica font. The edges are coloured dim gray with a solid style, a 1.5 arrow size, a vee arrowhead and a Helvetica font.
+#' The \code{lgb.plot.tree} function creates a DiagrammeR plot of a single LightGBM tree. The tree is extracted from the model and displayed as a directed graph. The nodes are labelled with the feature, split value, gain, cover and value. The edges are labelled with the decision type and split value.
 #'
 #' @examples
 #' \donttest{
@@ -23,9 +23,7 @@
 #' # define model parameters and build a single tree
 #' params <- list(
 #'     objective = "regression",
-#'     metric = "l2",
 #'     min_data = 1L,
-#'     learning_rate = 1.0
 #' )
 #' valids <- list(test = dtest)
 #' model <- lgb.train(
@@ -43,142 +41,151 @@
 #'
 #' @export
 
-# function to plot a single LightGBM tree using DiagrammeR
 lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) {
     # check model is lgb.Booster
-    if (!inherits(model, "lgb.Booster")) {
-        stop("model: Has to be an object of class lgb.Booster")
+    if (!.is_Booster(x = model)) {
+        stop("lgb.plot.tree: model should be an ", sQuote("lgb.Booster"))
     }
-    # check DiagrammeR is available
     if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
-        stop("DiagrammeR package is required for lgb.plot.tree",
+        stop("lgb.plot.tree: DiagrammeR package is required",
             call. = FALSE
         )
     }
     # tree must be numeric
     if (!inherits(tree, "numeric")) {
-        stop("tree: Has to be an integer numeric")
+        stop("lgb.plot.tree: Has to be an integer numeric")
     }
     # tree must be integer
     if (tree %% 1 != 0) {
-        stop("tree: Has to be an integer numeric")
+        stop("lgb.plot.tree: Has to be an integer numeric")
     }
     # extract data.table model structure
-    dt <- lgb.model.dt.tree(model)
+    modelDT <- lgb.model.dt.tree(model)
     # check that tree is less than or equal to the maximum tree index in the model
-    if (tree > max(dt$tree_index)) {
-        stop("tree: has to be less than the number of trees in the model")
+    if (tree > max(modelDT$tree_index)) {
+        stop("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index, "). Got: ," tree, ".")
     }
-    # filter dt to just the rows for the selected tree
-    dt <- dt[tree_index == tree, ]
+    # filter modelDT to just the rows for the selected tree
+    modelDT <- modelDT[tree_index == tree, ]
     # change the column names to shorter more diagram friendly versions
-    data.table::setnames(dt, old = c("tree_index", "split_feature", "threshold", "split_gain"), new = c("Tree", "Feature", "Split", "Gain"))
-    dt[, Value := 0.0]
-    dt[, Value := leaf_value]
-    dt[is.na(Value), Value := internal_value]
-    dt[is.na(Gain), Gain := leaf_value]
-    dt[is.na(Feature), Feature := "Leaf"]
-    dt[, Cover := internal_count][Feature == "Leaf", Cover := leaf_count]
-    dt[, c("leaf_count", "internal_count", "leaf_value", "internal_value") := NULL]
-    dt[, Node := split_index]
-    max_node <- max(dt[["Node"]], na.rm = TRUE)
-    dt[is.na(Node), Node := max_node + leaf_index + 1]
-    dt[, ID := paste(Tree, Node, sep = "-")]
-    dt[, c("depth", "leaf_index") := NULL]
-    dt[, parent := node_parent][is.na(parent), parent := leaf_parent]
-    dt[, c("node_parent", "leaf_parent", "split_index") := NULL]
-    dt[, Yes := dt$ID[match(dt$Node, dt$parent)]]
-    dt <- dt[nrow(dt):1, ]
-    dt[, No := dt$ID[match(dt$Node, dt$parent)]]
+    data.table::setnames(modelDT, old = c("tree_index", "split_feature", "threshold", "split_gain"), new = c("Tree", "Feature", "Split", "Gain"))
+    modelDT[, Value := 0.0]
+    modelDT[, Value := leaf_value]
+    modelDT[is.na(Value), Value := internal_value]
+    modelDT[is.na(Gain), Gain := leaf_value]
+    modelDT[is.na(Feature), Feature := "Leaf"]
+    modelDT[, Cover := internal_count][Feature == "Leaf", Cover := leaf_count]
+    modelDT[, c("leaf_count", "internal_count", "leaf_value", "internal_value") := NULL]
+    modelDT[, Node := split_index]
+    max_node <- max(modelDT[["Node"]], na.rm = TRUE)
+    modelDT[is.na(Node), Node := max_node + leaf_index + 1]
+    modelDT[, ID := paste(Tree, Node, sep = "-")]
+    modelDT[, c("depth", "leaf_index") := NULL]
+    modelDT[, parent := node_parent][is.na(parent), parent := leaf_parent]
+    modelDT[, c("node_parent", "leaf_parent", "split_index") := NULL]
+    modelDT[, Yes := modelDT$ID[match(modelDT$Node, modelDT$parent)]]
+    modelDT <- modelDT[nrow(modelDT):1, ]
+    modelDT[, No := modelDT$ID[match(modelDT$Node, modelDT$parent)]]
     # which way do the NA's go (this path will get a thicker arrow)
     # for categorical features, NA gets put into the zero group
-    dt[default_left == TRUE, Missing := Yes]
-    dt[default_left == FALSE, Missing := No]
-    zero_present <- function(x) {
-        sapply(strsplit(as.character(x), "||", fixed = TRUE), function(el) {
-            any(el == "0")
-        })
-    }
-    dt[zero_present(Split), Missing := Yes]
-    # dt[, c('parent', 'default_left') := NULL]
-    # data.table::setcolorder(dt, c('Tree','Node','ID','Feature','decision_type','Split','Yes','No','Missing','Gain','Cover','Value'))
+    modelDT[default_left == TRUE, Missing := Yes]
+    modelDT[default_left == FALSE, Missing := No]
+    modelDT[.zero_present(Split), Missing := Yes]
+    # modelDT[, c('parent', 'default_left') := NULL]
+    # data.table::setcolorder(modelDT, c('Tree','Node','ID','Feature','decision_type','Split','Yes','No','Missing','Gain','Cover','Value'))
     # create the label text
-    dt[, label := paste0(
+    modelDT[, label := paste0(
         Feature,
         "\nCover: ", Cover,
         ifelse(Feature == "Leaf", "", "\nGain: "), ifelse(Feature == "Leaf", "", round(Gain, 4)),
         "\nValue: ", round(Value, 4)
     )]
     # style the nodes - same format as xgboost
-    dt[Node == 0, label := paste0("Tree ", Tree, "\n", label)]
-    dt[, shape := "rectangle"][Feature == "Leaf", shape := "oval"]
-    dt[, filledcolor := "Beige"][Feature == "Leaf", filledcolor := "Khaki"]
+    modelDT[Node == 0, label := paste0("Tree ", Tree, "\n", label)]
+    modelDT[, shape := "rectangle"][Feature == "Leaf", shape := "oval"]
+    modelDT[, filledcolor := "Beige"][Feature == "Leaf", filledcolor := "Khaki"]
     # in order to draw the first tree on top:
-    dt <- dt[order(-Tree)]
+    modelDT <- modelDT[order(-Tree)]
     nodes <- DiagrammeR::create_node_df(
-        n         = nrow(dt),
-        ID        = dt$ID,
-        label     = dt$label,
-        fillcolor = dt$filledcolor,
-        shape     = dt$shape,
-        data      = dt$Feature,
-        fontcolor = "black"
+        n         = nrow(modelDT)
+        , ID        = modelDT$ID
+        , label     = modelDT$label
+        , fillcolor = modelDT$filledcolor
+        , shape     = modelDT$shape
+        , data      = modelDT$Feature
+        , fontcolor = "black"
     )
     # round the edge labels to 4 s.f. if they are numeric
     # as otherwise get too many decimal places and the diagram looks bad
     # would rather not use suppressWarnings
-    numeric_idx <- suppressWarnings(!is.na(as.numeric(dt[["Split"]])))
-    dt[numeric_idx, Split := round(as.numeric(Split), 4)]
+    numeric_idx <- suppressWarnings(!is.na(as.numeric(modelDT[["Split"]])))
+    modelDT[numeric_idx, Split := round(as.numeric(Split), 4)]
     # replace indices with feature levels if rules supplied
-    levels.to.names <- function(x, feature_name, rules) {
-        lvls <- sort(rules[[feature_name]])
-        result <- strsplit(x, "||", fixed = TRUE)
-        result <- lapply(result, as.numeric)
-        levels_to_names <- function(x) {
-            names(lvls)[as.numeric(x)]
-        }
-        result <- lapply(result, levels_to_names)
-        result <- lapply(result, paste, collapse = "\n")
-        result <- as.character(result)
-    }
+    
     if (!is.null(rules)) {
         for (f in names(rules)) {
-            dt[Feature == f & decision_type == "==", Split := levels.to.names(Split, f, rules)]
+            modelDT[Feature == f & decision_type == "==", Split := .levels.to.names(Split, f, rules)]
         }
     }
     # replace long split names with a message
-    dt[nchar(Split) > 500, Split := "Split too long to render"]
+    modelDT[nchar(Split) > 500, Split := "Split too long to render"]
     # create the edge labels
     edges <- DiagrammeR::create_edge_df(
-        from = match(dt[Feature != "Leaf", c(ID)] %>% rep(2), dt$ID),
-        to = match(dt[Feature != "Leaf", c(Yes, No)], dt$ID),
-        label = dt[Feature != "Leaf", paste(decision_type, Split)] %>%
-            c(rep("", nrow(dt[Feature != "Leaf"]))),
-        style = dt[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>%
-            c(dt[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]),
+        from = match(modelDT[Feature != "Leaf", c(ID)] %>% rep(2), modelDT$ID),
+        to = match(modelDT[Feature != "Leaf", c(Yes, No)], modelDT$ID),
+        label = modelDT[Feature != "Leaf", paste(decision_type, Split)] %>%
+            c(rep("", nrow(modelDT[Feature != "Leaf"]))),
+        style = modelDT[Feature != "Leaf", ifelse(Missing == Yes, "bold", "solid")] %>%
+            c(modelDT[Feature != "Leaf", ifelse(Missing == No, "bold", "solid")]),
         rel = "leading_to"
     )
     # create the graph
     graph <- DiagrammeR::create_graph(
-        nodes_df = nodes,
-        edges_df = edges,
-        attr_theme = NULL
-    ) %>%
-        DiagrammeR::add_global_graph_attrs(
-            attr_type = "graph",
-            attr = c("layout", "rankdir"),
-            value = c("dot", "LR")
-        ) %>%
-        DiagrammeR::add_global_graph_attrs(
-            attr_type = "node",
-            attr = c("color", "style", "fontname"),
-            value = c("DimGray", "filled", "Helvetica")
-        ) %>%
-        DiagrammeR::add_global_graph_attrs(
-            attr_type = "edge",
-            attr = c("color", "arrowsize", "arrowhead", "fontname"),
-            value = c("DimGray", "1.5", "vee", "Helvetica")
+        nodes_df = nodes
+        , edges_df = edges
+        , attr_theme = NULL
+    )
+    graph <- DiagrammeR::add_global_graph_attrs(
+        graph = graph
+        , attr_type = "graph"
+        , attr = c("layout", "rankdir")
+        , value = c("dot", "LR")
         )
+    graph <- DiagrammeR::add_global_graph_attrs(
+        graph = graph
+        , attr_type = "node"
+        , attr = c("color", "style", "fontname")
+        , value = c("DimGray", "filled", "Helvetica")
+    )
+    graph <- DiagrammeR::add_global_graph_attrs(
+        graph = graph
+        , attr_type = "edge"
+        , attr = c("color", "arrowsize", "arrowhead", "fontname")
+        , value = c("DimGray", "1.5", "vee", "Helvetica")
+    )
     # render the graph
     DiagrammeR::render_graph(graph)
+    return(invisible(NULL))
 }
+
+.zero_present <- function(x) {
+    sapply(strsplit(as.character(x), "||", fixed = TRUE), function(el) {
+        any(el == "0")
+    })
+    return(invisible(NULL))
+}
+
+.levels.to.names <- function(x, feature_name, rules) {
+    lvls <- sort(rules[[feature_name]])
+    result <- strsplit(x, "||", fixed = TRUE)
+    result <- lapply(result, as.numeric)
+    result <- lapply(result, .levels_to_names)
+    result <- lapply(result, paste, collapse = "\n")
+    result <- as.character(result)
+    return(invisible(NULL))
+}
+
+.levels_to_names <- function(x) {
+    names(lvls)[as.numeric(x)]
+    return(invisible(NULL))
+}
\ No newline at end of file
diff --git a/R-package/tests/testthat/test_lgb.plot.tree.R b/R-package/tests/testthat/test_lgb.plot.tree.R
index 64b462e186ec..c53cf3422090 100644
--- a/R-package/tests/testthat/test_lgb.plot.tree.R
+++ b/R-package/tests/testthat/test_lgb.plot.tree.R
@@ -2,58 +2,42 @@ test_that("lgb.plot.tree works as expected"){
   data(agaricus.train, package = "lightgbm")
   train <- agaricus.train
   dtrain <- lgb.Dataset(train$data, label = train$label)
-  data(agaricus.test, package = "lightgbm")
-  test <- agaricus.test
-  dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
   # define model parameters and build a single tree
-  params <- list(
-    objective = "regression"
-    , metric = "l2"
-    , min_data = 1L
-    , learning_rate = 1.0
-  )
-  valids <- list(test = dtest)
   model <- lgb.train(
-    params = params
+    params = list(
+        objective = "regression"
+        , num_threads = .LGB_MAX_THREADS
+    )
     , data = dtrain
     , nrounds = 1L
-    , valids = valids
-    , early_stopping_rounds = 1L
+    , verbose = .LGB_VERBOSITY
   )
   # plot the tree and compare to the tree table
   # trees start from 0 in lgb.model.dt.tree
   tree_table <- lgb.model.dt.tree(model)
   expect_true({
-    lgb.plot.tree(model, 0)TRUE
-  })
+    lgb.plot.tree(model, 0)
+  }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model")
 }
 
 test_that("lgb.plot.tree fails when a non existing tree is selected"){
   data(agaricus.train, package = "lightgbm")
   train <- agaricus.train
   dtrain <- lgb.Dataset(train$data, label = train$label)
-  data(agaricus.test, package = "lightgbm")
-  test <- agaricus.test
-  dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label)
   # define model parameters and build a single tree
-  params <- list(
-    objective = "regression"
-    , metric = "l2"
-    , min_data = 1L
-    , learning_rate = 1.0
-  )
-  valids <- list(test = dtest)
   model <- lgb.train(
-    params = params
+    params = list(
+        objective = "regression"
+        , num_threads = .LGB_MAX_THREADS
+    )
     , data = dtrain
     , nrounds = 1L
-    , valids = valids
-    , early_stopping_rounds = 1L
+    , verbose = .LGB_VERBOSITY
   )
   # plot the tree and compare to the tree table
   # trees start from 0 in lgb.model.dt.tree
   tree_table <- lgb.model.dt.tree(model)
   expect_error({
-    lgb.plot.tree(model, 999)TRUE
-  })
+    lgb.plot.tree(model, 999)
+  }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model")
 }

From 55aba68c7fac9273fc9e125eac8faa0aef222c41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Boudry?= <felix.boudry@univ-perp.fr>
Date: Wed, 25 Dec 2024 17:34:42 +0100
Subject: [PATCH 4/8] Updated tests. (based on
 R-package/tests/testthat/test_lgb.model.dt.tree.R) Now tests regressions,
 binary, multiclass classification and ranks.

---
 R-package/tests/testthat/test_lgb.plot.tree.R | 137 +++++++++++++-----
 1 file changed, 98 insertions(+), 39 deletions(-)

diff --git a/R-package/tests/testthat/test_lgb.plot.tree.R b/R-package/tests/testthat/test_lgb.plot.tree.R
index c53cf3422090..54440b9825dd 100644
--- a/R-package/tests/testthat/test_lgb.plot.tree.R
+++ b/R-package/tests/testthat/test_lgb.plot.tree.R
@@ -1,43 +1,102 @@
-test_that("lgb.plot.tree works as expected"){
-  data(agaricus.train, package = "lightgbm")
-  train <- agaricus.train
-  dtrain <- lgb.Dataset(train$data, label = train$label)
-  # define model parameters and build a single tree
-  model <- lgb.train(
-    params = list(
-        objective = "regression"
-        , num_threads = .LGB_MAX_THREADS
-    )
-    , data = dtrain
-    , nrounds = 1L
-    , verbose = .LGB_VERBOSITY
+NROUNDS <- 10L
+MAX_DEPTH <- 3L
+N <- nrow(iris)
+X <- data.matrix(iris[2L:4L])
+FEAT <- colnames(X)
+NCLASS <- nlevels(iris[, 5L])
+
+model_reg <- lgb.train(
+  params = list(
+    objective = "regression"
+    , num_threads = .LGB_MAX_THREADS
+    , max.depth = MAX_DEPTH
   )
-  # plot the tree and compare to the tree table
-  # trees start from 0 in lgb.model.dt.tree
-  tree_table <- lgb.model.dt.tree(model)
-  expect_true({
-    lgb.plot.tree(model, 0)
-  }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model")
-}
+  , data = lgb.Dataset(X, label = iris[, 1L])
+  , verbose = .LGB_VERBOSITY
+  , nrounds = NROUNDS
+)
+
+model_binary <- lgb.train(
+  params = list(
+    objective = "binary"
+    , num_threads = .LGB_MAX_THREADS
+    , max.depth = MAX_DEPTH
+  )
+  , data = lgb.Dataset(X, label = iris[, 5L] == "setosa")
+  , verbose = .LGB_VERBOSITY
+  , nrounds = NROUNDS
+)
+
+model_multiclass <- lgb.train(
+  params = list(
+    objective = "multiclass"
+    , num_threads = .LGB_MAX_THREADS
+    , max.depth = MAX_DEPTH
+    , num_classes = NCLASS
+  )
+  , data = lgb.Dataset(X, label = as.integer(iris[, 5L]) - 1L)
+  , verbose = .LGB_VERBOSITY
+  , nrounds = NROUNDS
+)
 
-test_that("lgb.plot.tree fails when a non existing tree is selected"){
-  data(agaricus.train, package = "lightgbm")
-  train <- agaricus.train
-  dtrain <- lgb.Dataset(train$data, label = train$label)
-  # define model parameters and build a single tree
-  model <- lgb.train(
-    params = list(
-        objective = "regression"
-        , num_threads = .LGB_MAX_THREADS
-    )
-    , data = dtrain
-    , nrounds = 1L
-    , verbose = .LGB_VERBOSITY
+model_rank <- lgb.train(
+  params = list(
+    objective = "lambdarank"
+    , num_threads = .LGB_MAX_THREADS
+    , max.depth = MAX_DEPTH
+    , lambdarank_truncation_level = 3L
   )
-  # plot the tree and compare to the tree table
-  # trees start from 0 in lgb.model.dt.tree
-  tree_table <- lgb.model.dt.tree(model)
-  expect_error({
-    lgb.plot.tree(model, 999)
-  }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model")
+  , data = lgb.Dataset(
+    X
+    , label = as.integer(iris[, 1L] > 5.8)
+    , group = rep(10L, times = 15L)
+  )
+  , verbose = .LGB_VERBOSITY
+  , nrounds = NROUNDS
+)
+
+models <- list(
+  reg = model_reg
+  , bin = model_binary
+  , multi = model_multiclass
+  , rank = model_rank
+)
+
+for (model_name in names(models)){
+  model <- models[[model_name]]
+  expected_n_trees <- NROUNDS
+  if (model_name == "multi") {
+    expected_n_trees <- NROUNDS * NCLASS
+  }
+  df <- as.data.frame(lgb.model.dt.tree(model))
+  df_list <- split(df, f = df$tree_index, drop = TRUE)
+  df_leaf <- df[!is.na(df$leaf_index), ]
+  df_internal <- df[is.na(df$leaf_index), ]
+
+  test_that("lgb.plot.tree fails when a non existing tree is selected", {
+    expect_error({
+      lgb.plot.tree(model, 0)
+    }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model")
+  })
+  test_that("lgb.plot.tree fails when a non existing tree is selected", {
+    expect_error({
+      lgb.plot.tree(model, 999)
+    }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model")
+  })
+  test_that("lgb.plot.tree fails when a non numeric tree is selected", {
+    expect_error({
+      lgb.plot.tree(model, "a")
+    }, regexp = "lgb.plot.tree: Has to be an integer numeric")
+  })
+  test_that("lgb.plot.tree fails when a non integer tree is selected", {
+    expect_error({
+      lgb.plot.tree(model, 1.5)
+    }, regexp = "lgb.plot.tree: Has to be an integer numeric")
+  })
+  test_that("lgb.plot.tree fails when a non lgb.Booster model is passed", {
+    expect_error({
+      lgb.plot.tree(1, 0)
+    }, regexp = "lgb.plot.tree: model should be an 'lgb.Booster'")
+  })
 }
+

From 85ff97aa733001c06085c6057422d4b8bc582aa4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Boudry?= <felix.boudry@univ-perp.fr>
Date: Wed, 25 Dec 2024 18:57:34 +0100
Subject: [PATCH 5/8] Corrected error (missing comma) in the selected tree
 check (L66). Commented code.

---
 R-package/R/lgb.plot.tree.R | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/R-package/R/lgb.plot.tree.R b/R-package/R/lgb.plot.tree.R
index c7ee06c5abbd..c020977fda5f 100644
--- a/R-package/R/lgb.plot.tree.R
+++ b/R-package/R/lgb.plot.tree.R
@@ -40,12 +40,12 @@
 #' }
 #'
 #' @export
-
 lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) {
     # check model is lgb.Booster
     if (!.is_Booster(x = model)) {
         stop("lgb.plot.tree: model should be an ", sQuote("lgb.Booster"))
     }
+    # check DiagrammeR is available
     if (!requireNamespace("DiagrammeR", quietly = TRUE)) {
         stop("lgb.plot.tree: DiagrammeR package is required",
             call. = FALSE
@@ -63,26 +63,36 @@ lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) {
     modelDT <- lgb.model.dt.tree(model)
     # check that tree is less than or equal to the maximum tree index in the model
     if (tree > max(modelDT$tree_index)) {
-        stop("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index, "). Got: ," tree, ".")
+        stop("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index), "). Got: ", tree, ".")
     }
     # filter modelDT to just the rows for the selected tree
     modelDT <- modelDT[tree_index == tree, ]
     # change the column names to shorter more diagram friendly versions
-    data.table::setnames(modelDT, old = c("tree_index", "split_feature", "threshold", "split_gain"), new = c("Tree", "Feature", "Split", "Gain"))
-    modelDT[, Value := 0.0]
+    data.table::setnames(modelDT
+    , old = c("tree_index", "split_feature", "threshold", "split_gain")
+    , new = c("Tree", "Feature", "Split", "Gain"))
+    # assign leaf_value to the Value column in modelDT
     modelDT[, Value := leaf_value]
+    # assign new values if NA
     modelDT[is.na(Value), Value := internal_value]
     modelDT[is.na(Gain), Gain := leaf_value]
     modelDT[is.na(Feature), Feature := "Leaf"]
+    # assign internal_count to Cover, and if Feature is "Leaf", assign leaf_count to Cover
     modelDT[, Cover := internal_count][Feature == "Leaf", Cover := leaf_count]
+    # remove unnecessary columns
     modelDT[, c("leaf_count", "internal_count", "leaf_value", "internal_value") := NULL]
+    # assign split_index to Node
     modelDT[, Node := split_index]
+    # find the maximum value of Node, if Node is NA, assign max_node + leaf_index + 1 to Node
     max_node <- max(modelDT[["Node"]], na.rm = TRUE)
     modelDT[is.na(Node), Node := max_node + leaf_index + 1]
+    # adding ID column
     modelDT[, ID := paste(Tree, Node, sep = "-")]
+    # remove unnecessary columns
     modelDT[, c("depth", "leaf_index") := NULL]
     modelDT[, parent := node_parent][is.na(parent), parent := leaf_parent]
     modelDT[, c("node_parent", "leaf_parent", "split_index") := NULL]
+    # assign the IDs of the matching parent nodes to Yes and No
     modelDT[, Yes := modelDT$ID[match(modelDT$Node, modelDT$parent)]]
     modelDT <- modelDT[nrow(modelDT):1, ]
     modelDT[, No := modelDT$ID[match(modelDT$Node, modelDT$parent)]]
@@ -91,14 +101,16 @@ lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) {
     modelDT[default_left == TRUE, Missing := Yes]
     modelDT[default_left == FALSE, Missing := No]
     modelDT[.zero_present(Split), Missing := Yes]
-    # modelDT[, c('parent', 'default_left') := NULL]
-    # data.table::setcolorder(modelDT, c('Tree','Node','ID','Feature','decision_type','Split','Yes','No','Missing','Gain','Cover','Value'))
     # create the label text
     modelDT[, label := paste0(
-        Feature,
-        "\nCover: ", Cover,
-        ifelse(Feature == "Leaf", "", "\nGain: "), ifelse(Feature == "Leaf", "", round(Gain, 4)),
-        "\nValue: ", round(Value, 4)
+        Feature
+        , "\nCover: "
+        , Cover
+        , ifelse(Feature == "Leaf", "", "\nGain: "), ifelse(Feature == "Leaf"
+        , ""
+        , round(Gain, 4))
+        , "\nValue: "
+        , round(Value, 4)
     )]
     # style the nodes - same format as xgboost
     modelDT[Node == 0, label := paste0("Tree ", Tree, "\n", label)]

From b4b648ab6522d305ce9e2c9f30c494c82b4e285f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Boudry?= <felix.boudry@univ-perp.fr>
Date: Wed, 25 Dec 2024 23:29:58 +0100
Subject: [PATCH 6/8] Corrected tests. Added a warning to functions and shorter
 stop message to make tests work.

---
 R-package/R/lgb.plot.tree.R                   |  5 +++--
 R-package/tests/testthat/test_lgb.plot.tree.R | 15 ++++-----------
 2 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/R-package/R/lgb.plot.tree.R b/R-package/R/lgb.plot.tree.R
index c020977fda5f..5df73f2de17e 100644
--- a/R-package/R/lgb.plot.tree.R
+++ b/R-package/R/lgb.plot.tree.R
@@ -62,8 +62,9 @@ lgb.plot.tree <- function(model = NULL, tree = NULL, rules = NULL) {
     # extract data.table model structure
     modelDT <- lgb.model.dt.tree(model)
     # check that tree is less than or equal to the maximum tree index in the model
-    if (tree > max(modelDT$tree_index)) {
-        stop("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index), "). Got: ", tree, ".")
+    if (tree > max(modelDT$tree_index) || tree < 1) {
+        warning("lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model (", max(modelDT$tree_index), "). Got: ", tree, ".")
+        stop("lgb.plot.tree: Invalid tree number")
     }
     # filter modelDT to just the rows for the selected tree
     modelDT <- modelDT[tree_index == tree, ]
diff --git a/R-package/tests/testthat/test_lgb.plot.tree.R b/R-package/tests/testthat/test_lgb.plot.tree.R
index 54440b9825dd..857b61030544 100644
--- a/R-package/tests/testthat/test_lgb.plot.tree.R
+++ b/R-package/tests/testthat/test_lgb.plot.tree.R
@@ -64,24 +64,17 @@ models <- list(
 
 for (model_name in names(models)){
   model <- models[[model_name]]
-  expected_n_trees <- NROUNDS
-  if (model_name == "multi") {
-    expected_n_trees <- NROUNDS * NCLASS
-  }
-  df <- as.data.frame(lgb.model.dt.tree(model))
-  df_list <- split(df, f = df$tree_index, drop = TRUE)
-  df_leaf <- df[!is.na(df$leaf_index), ]
-  df_internal <- df[is.na(df$leaf_index), ]
+  modelDT <- lgb.model.dt.tree(model)
 
   test_that("lgb.plot.tree fails when a non existing tree is selected", {
     expect_error({
       lgb.plot.tree(model, 0)
-    }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model")
+    }, regexp = paste0("lgb.plot.tree: Invalid tree number"))
   })
   test_that("lgb.plot.tree fails when a non existing tree is selected", {
     expect_error({
       lgb.plot.tree(model, 999)
-    }, regexp = "lgb.plot.tree: Value of 'tree' should be between 1 and the total number of trees in the model")
+    }, regexp = paste0("lgb.plot.tree: Invalid tree number"))
   })
   test_that("lgb.plot.tree fails when a non numeric tree is selected", {
     expect_error({
@@ -96,7 +89,7 @@ for (model_name in names(models)){
   test_that("lgb.plot.tree fails when a non lgb.Booster model is passed", {
     expect_error({
       lgb.plot.tree(1, 0)
-    }, regexp = "lgb.plot.tree: model should be an 'lgb.Booster'")
+    }, regexp = paste0("lgb.plot.tree: model should be an ", sQuote("lgb.Booster")))
   })
 }
 

From ed6244119239ef539d32829b9da099c1c7695911 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?F=C3=A9lix=20Boudry?= <felix.boudry@univ-perp.fr>
Date: Mon, 30 Dec 2024 10:47:56 +0100
Subject: [PATCH 7/8] Added DiagrammeR in CI, github and Azure workflows as
 well as in the README.

---
 .ci/test-r-package.sh                 | 2 +-
 .github/workflows/r_package.yml       | 4 ++--
 .github/workflows/static_analysis.yml | 2 +-
 .vsts-ci.yml                          | 2 +-
 R-package/README.md                   | 4 ++--
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/.ci/test-r-package.sh b/.ci/test-r-package.sh
index 2e414ec0d282..55d37e6dff03 100755
--- a/.ci/test-r-package.sh
+++ b/.ci/test-r-package.sh
@@ -114,7 +114,7 @@ Rscript --vanilla -e "install.packages('https://cran.r-project.org/src/contrib/A
 
 # Manually install Depends and Imports libraries + 'knitr', 'markdown', 'RhpcBLASctl', 'testthat'
 # to avoid a CI-time dependency on devtools (for devtools::install_deps())
-packages="c('data.table', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')"
+packages="c('data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'R6', 'RhpcBLASctl', 'testthat')"
 compile_from_source="both"
 if [[ $OS_NAME == "macos" ]]; then
     packages+=", type = 'binary'"
diff --git a/.github/workflows/r_package.yml b/.github/workflows/r_package.yml
index 66e05a18ba1f..c8506a414215 100644
--- a/.github/workflows/r_package.yml
+++ b/.github/workflows/r_package.yml
@@ -230,7 +230,7 @@ jobs:
       - name: Install packages
         shell: bash
         run: |
-          RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
+          RDscript${{ matrix.r_customization }} -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
           sh build-cran-package.sh --r-executable=RD${{ matrix.r_customization }}
           RD${{ matrix.r_customization }} CMD INSTALL lightgbm_*.tar.gz || exit 1
       - name: Run tests with sanitizers
@@ -295,7 +295,7 @@ jobs:
       - name: Install packages and run tests
         shell: bash
         run: |
-          Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
+          Rscript -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
           sh build-cran-package.sh
 
           # 'rchk' isn't run through 'R CMD check', use the approach documented at
diff --git a/.github/workflows/static_analysis.yml b/.github/workflows/static_analysis.yml
index 34e573e0eea6..872ef9dbac14 100644
--- a/.github/workflows/static_analysis.yml
+++ b/.github/workflows/static_analysis.yml
@@ -64,7 +64,7 @@ jobs:
       - name: Install packages
         shell: bash
         run: |
-          Rscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'roxygen2', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
+          Rscript -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'roxygen2', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
           sh build-cran-package.sh || exit 1
           R CMD INSTALL --with-keep.source lightgbm_*.tar.gz || exit 1
       - name: Test documentation
diff --git a/.vsts-ci.yml b/.vsts-ci.yml
index 40424840c82d..6f99e37189cf 100644
--- a/.vsts-ci.yml
+++ b/.vsts-ci.yml
@@ -392,7 +392,7 @@ jobs:
       R_LIB_PATH=~/Rlib
       export R_LIBS=${R_LIB_PATH}
       mkdir -p ${R_LIB_PATH}
-      RDscript -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'),  lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" || exit 1
+      RDscript -e "install.packages(c('R6', 'data.table',  'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl'),  lib = '${R_LIB_PATH}', dependencies = c('Depends', 'Imports', 'LinkingTo'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())" || exit 1
       sh build-cran-package.sh --r-executable=RD || exit 1
       mv lightgbm_${LGB_VER}.tar.gz $(Build.ArtifactStagingDirectory)/lightgbm-${LGB_VER}-r-cran.tar.gz
     displayName: 'Build CRAN R-package'
diff --git a/R-package/README.md b/R-package/README.md
index f1821f5cc6be..8900f5c5ccec 100644
--- a/R-package/README.md
+++ b/R-package/README.md
@@ -428,7 +428,7 @@ docker run \
 
 # install dependencies
 RDscript${R_CUSTOMIZATION} \
-  -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
+  -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.r-project.org', Ncpus = parallel::detectCores())"
 
 # install lightgbm
 sh build-cran-package.sh --r-executable=RD${R_CUSTOMIZATION}
@@ -459,7 +459,7 @@ docker run \
     -it \
         wch1/r-debug
 
-RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
+RDscriptvalgrind -e "install.packages(c('R6', 'data.table', 'DiagrammeR', 'jsonlite', 'knitr', 'markdown', 'Matrix', 'RhpcBLASctl', 'testthat'), repos = 'https://cran.rstudio.com', Ncpus = parallel::detectCores())"
 
 sh build-cran-package.sh \
     --r-executable=RDvalgrind

From 2710705cef9bb4e0c6ec7489c9419ffe58d2922c Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 31 Dec 2024 13:05:01 -0600
Subject: [PATCH 8/8] Update R-package/DESCRIPTION

---
 R-package/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION
index 096265331d59..4f3730b25593 100755
--- a/R-package/DESCRIPTION
+++ b/R-package/DESCRIPTION
@@ -50,7 +50,7 @@ Suggests:
     markdown,
     processx,
     RhpcBLASctl,
-    testthat,
+    testthat
 Depends:
     R (>= 3.5)
 Imports: