multimeric · multimeric · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024 · Apr 1, 2024
diff --git a/.github/workflows/check-standard.yaml b/.github/workflows/check-standard.yaml
@@ -31,16 +31,16 @@ jobs:
     steps:
       - uses: actions/checkout@v2
 
-      - uses: r-lib/actions/setup-pandoc@v1
+      - uses: r-lib/actions/setup-pandoc@v2
 
-      - uses: r-lib/actions/setup-r@v1
+      - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.r }}
           http-user-agent: ${{ matrix.config.http-user-agent }}
           use-public-rspm: true
 
-      - uses: r-lib/actions/setup-r-dependencies@v1
+      - uses: r-lib/actions/setup-r-dependencies@v2
         with:
           extra-packages: rcmdcheck
 
-      - uses: r-lib/actions/check-r-package@v1
+      - uses: r-lib/actions/check-r-package@v2
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -28,7 +28,7 @@ Suggests:
     ggplot2,
     HistDat
 Config/testthat/edition: 3
-RoxygenNote: 7.1.2
+RoxygenNote: 7.3.1
 Roxygen: list(markdown = TRUE)
 VignetteBuilder: knitr
 URL: https://multimeric.github.io/TidyMultiqc/, https://github.com/multimeric/TidyMultiqc, https://cran.r-project.org/package=TidyMultiqc

diff --git a/R/internal_utils.R b/R/internal_utils.R
@@ -30,7 +30,7 @@ kv_map <- function(l, func, map_keys = FALSE) {
 sanitise_column_name <- function(name) {
   name %>%
     stringr::str_replace_all(pattern = "[- ]", replacement = "_") %>% # Any dividing characters become underscores
-    stringr::str_remove_all(pattern = "[^\\w%_]") %>% # Any special characters bar underscore and % get deleted
+    stringr::str_remove_all(pattern = "[^\\w%_]") %>% # Any special characters except underscore and % get deleted
     stringr::str_to_lower()
 }
 

diff --git a/R/multiqc.R b/R/multiqc.R
@@ -6,9 +6,8 @@
 #' instead refer to the documentation website at <https://multimeric.github.io/TidyMultiqc/>, which
 #' provides more accessible documentation.
 #' @importFrom magrittr `%>%`
-#' @docType package
 #' @name TidyMultiqc-package
-NULL
+"_PACKAGE"
 
 # Make R CMD Check hush
 utils::globalVariables(c(".", "metadata.sample_id"))

diff --git a/R/plot_parsers.R b/R/plot_parsers.R
@@ -1,3 +1,32 @@
+#' Parses a list of xyline plot datasets
+#' @param dataset A list which has the names "name" and "data", which
+#'  originate from a MultiQC plot
+#' @return A list whose names are sample names. Each value is a data frame with one column.
+#' @keywords internal
+map_xy_line_datasets <- function(datasets, plot_name){
+  kv_map(datasets, function(dataset){
+    list(
+      key = dataset$name,
+      value = dataset$data %>%
+        purrr::map(~ tibble::tibble_row(x = .[[1]], y = .[[2]])) %>%
+        purrr::list_rbind() %>%
+        # Chop the multi-row data frame into one row
+        tidyr::nest(.key = stringr::str_c("plot", plot_name, sep = "."))
+    )
+  })
+}
+
+#' Determine if a plot is Plotly
+#'
+#' Plotly plots are generated by MultiQC 1.2 and above.
+#' This matters because the plot data formats differ between these two formats.
+#' @keywords internal
+#' @param plot_data A list containing the top level data for a single plot.
+#' @return A logical scalar. TRUE if the plot is a plotly plot, or FALSE if it's a HighCharts one.
+is_plotly <- function(plot_data){
+  !is.null(plot_data$layout)
+}
+
 #' Takes the JSON dictionary for an xyline plot, and returns a named list of
 #' data frames, one for each sample.
 #' @keywords internal
@@ -12,22 +41,23 @@ parse_xyline_plot <- function(plot_data, name) {
   # This only works on xyline plots
   assertthat::assert_that(plot_data$plot_type == "xy_line")
 
-  plot_data$datasets %>%
-    purrr::map(function(dataset) {
+  # MultiQC >=1.2 plotly parser
+  if (is_plotly(plot_data)){
+    plot_data$datasets %>%
+      purrr::map(function(dataset){
+        dataset$lines %>%
+          map_xy_line_datasets(plot_name = name)
+      }) %>%
+      purrr::list_flatten()
+  }
+
+  # MultiQC <=1.1 highcharts parser
+  else {
+    plot_data$datasets %>%
       # For some reason there are two levels of nesting here
-      dataset %>%
-        kv_map(function(subdataset) {
-          name <- stringr::str_c("plot", name, sep = ".")
-          list(
-            key = subdataset$name,
-            value = subdataset$data %>%
-              purrr::map_dfr(~ list(x = .[[1]], y = .[[2]])) %>%
-              # Chop the multi-row data frame into one row
-              tidyr::nest({{ name }} := tidyr::everything()) # %>%
-          )
-        })
-    }) %>%
-    purrr::reduce(~ purrr::list_merge(.x, !!!.y))
+      purrr::map(map_xy_line_datasets, plot_name = name) %>%
+      purrr::list_flatten()
+  }
 }
 
 #' Takes the JSON dictionary for a bar graph, and returns a named list of
@@ -41,30 +71,60 @@ parse_xyline_plot <- function(plot_data, name) {
 #' one column for the number of intron variants, one column for the number of exon variants, etc.
 #' This means that the number of columns will be fairly variable for different plots.
 parse_bar_graph <- function(plot_data, name) {
-  # This only works on bar_graphs
   assertthat::assert_that(plot_data$plot_type == "bar_graph")
-
-  # Make a list of samples
-  samples <- plot_data$samples[[1]] %>% purrr::flatten_chr()
+  plot_data$datasets %>% length() %>% `==`(1) %>% assertthat::assert_that(msg = "Only bar graphs with 1 dataset are understood by this parser!")
 
   colname <- stringr::str_c("plot", sanitise_column_name(name), sep = ".")
 
-  plot_data$datasets[[1]] %>%
-    # First, build up a dictionary of samples -> dictionary of quality metrics
-    purrr::map(function(dataset) {
-      segment_name <- dataset$name
-      dataset$data %>%
-        # For this segment, each sample has a value
-        kv_map(function(value, idx) {
-          list(
-            key = samples[[idx]],
-            value = list(value) %>% purrr::set_names(sanitise_column_name(segment_name))
-          )
-        }, map_keys = TRUE)
-    }) %>%
-    purrr::reduce(utils::modifyList) %>%
-    # Then, convert each inner dictionary to a tibble row
-    purrr::map(tibble::as_tibble_row) %>%
-    # And nest each df so that we only have 1 cell of output per sample
-    purrr::map(~ tidyr::nest(., {{ colname }} := tidyr::everything()))
+  if (is_plotly(plot_data)){
+    # MultiQC 1.2+
+    dataset <- plot_data$datasets[[1]]
+    samples <- dataset$samples %>% purrr::flatten_chr()
+    # We make a data frame whose rows are samples and whose columns are categories
+    # Ideally this would be the final output, but currently the other code
+    # expects a list of samples
+    df <- dataset$cats %>%
+      purrr::map(function(cat){
+        tibble::as_tibble_col(
+          purrr::flatten_dbl(cat$data),
+          column_name = sanitise_column_name(cat$name)
+        )
+      }) %>%
+      purrr::list_cbind()
+
+    # For compatibility with the old format
+    if ("unknown" %in% colnames(df)){
+      df <- dplyr::rename(df, none = unknown)
+    }
+
+    # And then we slice out each row to become its own list
+    seq_along(samples) %>%
+      purrr::map(function(sample_idx){
+        df[sample_idx, ] %>% tidyr::nest(.key = colname)
+      }) %>%
+      purrr::set_names(samples) %>%
+      `[`(sort(samples))
+  }
+  else {
+    # Make a list of samples
+    samples <- plot_data$samples[[1]] %>% purrr::flatten_chr()
+    plot_data$datasets[[1]] %>%
+      # First, build up a dictionary of samples -> dictionary of quality metrics
+      purrr::map(function(dataset) {
+        segment_name <- dataset$name
+        dataset$data %>%
+          # For this segment, each sample has a value
+          kv_map(function(value, idx) {
+            list(
+              key = samples[[idx]],
+              value = list(value) %>% purrr::set_names(sanitise_column_name(segment_name))
+            )
+          }, map_keys = TRUE)
+      }) %>%
+      purrr::reduce(utils::modifyList) %>%
+      # Then, convert each inner dictionary to a tibble row
+      purrr::map(tibble::as_tibble_row) %>%
+      # And nest each df so that we only have 1 cell of output per sample
+      purrr::map(~ tidyr::nest(., .key = colname))
+  }
 }