Skip to content

Commit

Permalink
Changed get_cluster_fill_counts() to use tidyselect
Browse files Browse the repository at this point in the history
  • Loading branch information
stephaniereinders committed Nov 26, 2024
1 parent e0f91ea commit 6ac9d79
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 33 deletions.
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ Imports:
rjags,
stringr,
tidyr,
tidyselect
Suggests:
knitr,
rmarkdown,
Expand Down
45 changes: 17 additions & 28 deletions R/cluster_format.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,51 +65,40 @@ format_template_data <- function(template) {
}


#' get_cluster_fill_counts
#' Get Cluster Fill Counts
#'
#' `get_cluster_fill_counts()` creates a data frame that shows the number of
#' graphs in each cluster for each input document.
#'
#' @param df A data frame with columns `writer`, `doc`, and `cluster`. Each
#' row corresponding to a graph and lists the writer of that graph, the document
#' from which the graph was obtained, and the cluster to which that graph is assigned.
#' @return A dataframe of cluster fill counts for each document in the input data frame.
#'
#' @param df A data frame with columns `docname` and `cluster`. Each row
#' corresponding to a graph and lists the document from which the graph was
#' obtained, and the cluster to which that graph is assigned. Optionally, the
#' data frame might also have `writer` and `doc` columns. If present, `writer`
#' lists the writer ID of each document and `doc` is an identifier to
#' distinguish between different documents from the same writer.
#' @return A dataframe of cluster fill counts for each document in the input
#' data frame.
#'
#' @examples
#' writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
#' docname <- c(rep('doc1',20), rep('doc2', 20), rep('doc3', 20))
#' writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
#' doc <- c(rep(1, 20), rep(2, 20), rep(3, 20))
#' cluster <- sample(3, 60, replace=TRUE)
#' df <- data.frame(docname, writer, doc, cluster)
#' get_cluster_fill_counts(df)
#'
#'
#' @export
#' @md
get_cluster_fill_counts <- function(df) {
docname <- writer <- doc <- cluster <- n <- NULL

if (('writer' %in% colnames(df)) && ('doc' %in% colnames(df))) {
# count number of graphs in each cluster for each writer
cluster_fill_counts <- df %>%
dplyr::group_by(docname, writer, doc, cluster) %>%
dplyr::summarise(n = dplyr::n()) %>%
dplyr::mutate(n = as.integer(n)) %>%
tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0)

# sort columns
cols <- c(colnames(cluster_fill_counts[, c(1, 2, 3)]), sort(as.numeric(colnames(cluster_fill_counts[, -c(1, 2, 3)]))))
cluster_fill_counts <- cluster_fill_counts[, cols]
} else {
cluster_fill_counts <- df %>%
dplyr::group_by(docname, cluster) %>%
# count number of graphs in each cluster for each writer
cluster_fill_counts <- df %>%
dplyr::group_by(dplyr::pick(tidyselect::any_of(c("docname", "writer", "doc", "cluster")))) %>%
dplyr::summarise(n = dplyr::n()) %>%
dplyr::mutate(n = as.integer(n)) %>%
tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0)

# sort columns
cols <- c(colnames(cluster_fill_counts[, c(1)]), sort(as.numeric(colnames(cluster_fill_counts[, -c(1)]))))
cluster_fill_counts <- cluster_fill_counts[, cols]
}
tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0) %>%
dplyr::select(tidyselect::any_of(c("docname", "writer", "doc")), tidyselect::everything())

return(cluster_fill_counts)
}
Expand Down
14 changes: 9 additions & 5 deletions man/get_cluster_fill_counts.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6ac9d79

Please sign in to comment.