Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Multiqc 1.2 #13

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/check-standard.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,16 +31,16 @@ jobs:
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-pandoc@v1
- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v1
- uses: r-lib/actions/setup-r@v2
with:
r-version: ${{ matrix.config.r }}
http-user-agent: ${{ matrix.config.http-user-agent }}
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v1
- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: rcmdcheck

- uses: r-lib/actions/check-r-package@v1
- uses: r-lib/actions/check-r-package@v2
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ Suggests:
ggplot2,
HistDat
Config/testthat/edition: 3
RoxygenNote: 7.1.2
RoxygenNote: 7.3.1
Roxygen: list(markdown = TRUE)
VignetteBuilder: knitr
URL: https://multimeric.github.io/TidyMultiqc/, https://github.com/multimeric/TidyMultiqc, https://cran.r-project.org/package=TidyMultiqc
Expand Down
2 changes: 1 addition & 1 deletion R/internal_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ kv_map <- function(l, func, map_keys = FALSE) {
sanitise_column_name <- function(name) {
name %>%
stringr::str_replace_all(pattern = "[- ]", replacement = "_") %>% # Any dividing characters become underscores
stringr::str_remove_all(pattern = "[^\\w%_]") %>% # Any special characters bar underscore and % get deleted
stringr::str_remove_all(pattern = "[^\\w%_]") %>% # Any special characters except underscore and % get deleted
stringr::str_to_lower()
}

Expand Down
3 changes: 1 addition & 2 deletions R/multiqc.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@
#' instead refer to the documentation website at <https://multimeric.github.io/TidyMultiqc/>, which
#' provides more accessible documentation.
#' @importFrom magrittr `%>%`
#' @docType package
#' @name TidyMultiqc-package
NULL
"_PACKAGE"

# Make R CMD Check hush
utils::globalVariables(c(".", "metadata.sample_id"))
Expand Down
134 changes: 97 additions & 37 deletions R/plot_parsers.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
#' Parses a list of xyline plot datasets
#' @param dataset A list which has the names "name" and "data", which
#' originate from a MultiQC plot
#' @return A list whose names are sample names. Each value is a data frame with one column.
#' @keywords internal
map_xy_line_datasets <- function(datasets, plot_name){
kv_map(datasets, function(dataset){
list(
key = dataset$name,
value = dataset$data %>%
purrr::map(~ tibble::tibble_row(x = .[[1]], y = .[[2]])) %>%
purrr::list_rbind() %>%
# Chop the multi-row data frame into one row
tidyr::nest(.key = stringr::str_c("plot", plot_name, sep = "."))
)
})
}

#' Determine if a plot is Plotly
#'
#' Plotly plots are generated by MultiQC 1.2 and above.
#' This matters because the plot data formats differ between these two formats.
#' @keywords internal
#' @param plot_data A list containing the top level data for a single plot.
#' @return A logical scalar. TRUE if the plot is a plotly plot, or FALSE if it's a HighCharts one.
is_plotly <- function(plot_data){
!is.null(plot_data$layout)
}

#' Takes the JSON dictionary for an xyline plot, and returns a named list of
#' data frames, one for each sample.
#' @keywords internal
Expand All @@ -12,22 +41,23 @@ parse_xyline_plot <- function(plot_data, name) {
# This only works on xyline plots
assertthat::assert_that(plot_data$plot_type == "xy_line")

plot_data$datasets %>%
purrr::map(function(dataset) {
# MultiQC >=1.2 plotly parser
if (is_plotly(plot_data)){
plot_data$datasets %>%
purrr::map(function(dataset){
dataset$lines %>%
map_xy_line_datasets(plot_name = name)
}) %>%
purrr::list_flatten()
}

# MultiQC <=1.1 highcharts parser
else {
plot_data$datasets %>%
# For some reason there are two levels of nesting here
dataset %>%
kv_map(function(subdataset) {
name <- stringr::str_c("plot", name, sep = ".")
list(
key = subdataset$name,
value = subdataset$data %>%
purrr::map_dfr(~ list(x = .[[1]], y = .[[2]])) %>%
# Chop the multi-row data frame into one row
tidyr::nest({{ name }} := tidyr::everything()) # %>%
)
})
}) %>%
purrr::reduce(~ purrr::list_merge(.x, !!!.y))
purrr::map(map_xy_line_datasets, plot_name = name) %>%
purrr::list_flatten()
}
}

#' Takes the JSON dictionary for a bar graph, and returns a named list of
Expand All @@ -41,30 +71,60 @@ parse_xyline_plot <- function(plot_data, name) {
#' one column for the number of intron variants, one column for the number of exon variants, etc.
#' This means that the number of columns will be fairly variable for different plots.
parse_bar_graph <- function(plot_data, name) {
# This only works on bar_graphs
assertthat::assert_that(plot_data$plot_type == "bar_graph")

# Make a list of samples
samples <- plot_data$samples[[1]] %>% purrr::flatten_chr()
plot_data$datasets %>% length() %>% `==`(1) %>% assertthat::assert_that(msg = "Only bar graphs with 1 dataset are understood by this parser!")

colname <- stringr::str_c("plot", sanitise_column_name(name), sep = ".")

plot_data$datasets[[1]] %>%
# First, build up a dictionary of samples -> dictionary of quality metrics
purrr::map(function(dataset) {
segment_name <- dataset$name
dataset$data %>%
# For this segment, each sample has a value
kv_map(function(value, idx) {
list(
key = samples[[idx]],
value = list(value) %>% purrr::set_names(sanitise_column_name(segment_name))
)
}, map_keys = TRUE)
}) %>%
purrr::reduce(utils::modifyList) %>%
# Then, convert each inner dictionary to a tibble row
purrr::map(tibble::as_tibble_row) %>%
# And nest each df so that we only have 1 cell of output per sample
purrr::map(~ tidyr::nest(., {{ colname }} := tidyr::everything()))
if (is_plotly(plot_data)){
# MultiQC 1.2+
dataset <- plot_data$datasets[[1]]
samples <- dataset$samples %>% purrr::flatten_chr()
# We make a data frame whose rows are samples and whose columns are categories
# Ideally this would be the final output, but currently the other code
# expects a list of samples
df <- dataset$cats %>%
purrr::map(function(cat){
tibble::as_tibble_col(
purrr::flatten_dbl(cat$data),
column_name = sanitise_column_name(cat$name)
)
}) %>%
purrr::list_cbind()

# For compatibility with the old format
if ("unknown" %in% colnames(df)){
df <- dplyr::rename(df, none = unknown)
}

# And then we slice out each row to become its own list
seq_along(samples) %>%
purrr::map(function(sample_idx){
df[sample_idx, ] %>% tidyr::nest(.key = colname)
}) %>%
purrr::set_names(samples) %>%
`[`(sort(samples))
}
else {
# Make a list of samples
samples <- plot_data$samples[[1]] %>% purrr::flatten_chr()
plot_data$datasets[[1]] %>%
# First, build up a dictionary of samples -> dictionary of quality metrics
purrr::map(function(dataset) {
segment_name <- dataset$name
dataset$data %>%
# For this segment, each sample has a value
kv_map(function(value, idx) {
list(
key = samples[[idx]],
value = list(value) %>% purrr::set_names(sanitise_column_name(segment_name))
)
}, map_keys = TRUE)
}) %>%
purrr::reduce(utils::modifyList) %>%
# Then, convert each inner dictionary to a tibble row
purrr::map(tibble::as_tibble_row) %>%
# And nest each df so that we only have 1 cell of output per sample
purrr::map(~ tidyr::nest(., .key = colname))
}
}
Loading
Loading