From 6ac9d79783048b1919c839955414b8dbe1fa6b3f Mon Sep 17 00:00:00 2001
From: Stephanie Reinders <reinders.stephanie@gmail.com>
Date: Tue, 26 Nov 2024 13:28:02 -0600
Subject: [PATCH] Changed `get_cluster_fill_counts()` to use `tidyselect`

---
 DESCRIPTION                    |  1 +
 R/cluster_format.R             | 45 +++++++++++++---------------------
 man/get_cluster_fill_counts.Rd | 14 +++++++----
 3 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index cf79788a..8017da30 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -33,6 +33,7 @@ Imports:
     rjags,
     stringr,
     tidyr,
+    tidyselect
 Suggests: 
     knitr,
     rmarkdown,
diff --git a/R/cluster_format.R b/R/cluster_format.R
index a3cf6e64..10638fd3 100644
--- a/R/cluster_format.R
+++ b/R/cluster_format.R
@@ -65,51 +65,40 @@ format_template_data <- function(template) {
 }
 
 
-#' get_cluster_fill_counts
+#' Get Cluster Fill Counts
 #'
 #' `get_cluster_fill_counts()` creates a data frame that shows the number of
 #' graphs in each cluster for each input document.
 #'
-#' @param df A data frame with columns `writer`, `doc`, and `cluster`. Each
-#'   row corresponding to a graph and lists the writer of that graph, the document
-#'   from which the graph was obtained, and the cluster to which that graph is assigned.
-#' @return A dataframe of cluster fill counts for each document in the input data frame.
-#' 
+#' @param df A data frame with columns `docname` and `cluster`. Each row
+#'   corresponding to a graph and lists the document from which the graph was
+#'   obtained, and the cluster to which that graph is assigned. Optionally, the
+#'   data frame might also have `writer` and `doc` columns. If present, `writer`
+#'   lists the writer ID of each document and `doc` is an identifier to
+#'   distinguish between different documents from the same writer.
+#' @return A dataframe of cluster fill counts for each document in the input
+#'   data frame.
+#'
 #' @examples
-#' writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
 #' docname <- c(rep('doc1',20), rep('doc2', 20), rep('doc3', 20))
+#' writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
 #' doc <- c(rep(1, 20), rep(2, 20), rep(3, 20))
 #' cluster <- sample(3, 60, replace=TRUE)
 #' df <- data.frame(docname, writer, doc, cluster)
 #' get_cluster_fill_counts(df)
-#' 
+#'
 #' @export
 #' @md
 get_cluster_fill_counts <- function(df) {
   docname <- writer <- doc <- cluster <- n <- NULL
   
-  if (('writer' %in% colnames(df)) && ('doc' %in% colnames(df))) {
-    # count number of graphs in each cluster for each writer
-    cluster_fill_counts <- df %>%
-      dplyr::group_by(docname, writer, doc, cluster) %>%
-      dplyr::summarise(n = dplyr::n()) %>%
-      dplyr::mutate(n = as.integer(n)) %>%
-      tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0)
-    
-    # sort columns
-    cols <- c(colnames(cluster_fill_counts[, c(1, 2, 3)]), sort(as.numeric(colnames(cluster_fill_counts[, -c(1, 2, 3)]))))
-    cluster_fill_counts <- cluster_fill_counts[, cols]
-  } else {
-    cluster_fill_counts <- df %>%
-      dplyr::group_by(docname, cluster) %>%
+  # count number of graphs in each cluster for each writer
+  cluster_fill_counts <- df %>%
+      dplyr::group_by(dplyr::pick(tidyselect::any_of(c("docname", "writer", "doc", "cluster")))) %>%
       dplyr::summarise(n = dplyr::n()) %>%
       dplyr::mutate(n = as.integer(n)) %>%
-      tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0)
-    
-    # sort columns
-    cols <- c(colnames(cluster_fill_counts[, c(1)]), sort(as.numeric(colnames(cluster_fill_counts[, -c(1)]))))
-    cluster_fill_counts <- cluster_fill_counts[, cols]
-  }
+      tidyr::pivot_wider(names_from = cluster, values_from = n, values_fill = 0) %>%
+      dplyr::select(tidyselect::any_of(c("docname", "writer", "doc")), tidyselect::everything())
 
   return(cluster_fill_counts)
 }
diff --git a/man/get_cluster_fill_counts.Rd b/man/get_cluster_fill_counts.Rd
index 1d15ea4a..203c6478 100644
--- a/man/get_cluster_fill_counts.Rd
+++ b/man/get_cluster_fill_counts.Rd
@@ -7,20 +7,24 @@
 get_cluster_fill_counts(df)
 }
 \arguments{
-\item{df}{A data frame with columns \code{writer}, \code{doc}, and \code{cluster}. Each
-row corresponding to a graph and lists the writer of that graph, the document
-from which the graph was obtained, and the cluster to which that graph is assigned.}
+\item{df}{A data frame with columns \code{docname} and \code{cluster}. Each row
+corresponding to a graph and lists the document from which the graph was
+obtained, and the cluster to which that graph is assigned. Optionally, the
+data frame might also have \code{writer} and \code{doc} columns. If present, \code{writer}
+lists the writer ID of each document and \code{doc} is an identifier to
+distinguish between different documents from the same writer.}
 }
 \value{
-A dataframe of cluster fill counts for each document in the input data frame.
+A dataframe of cluster fill counts for each document in the input
+data frame.
 }
 \description{
 \code{get_cluster_fill_counts()} creates a data frame that shows the number of
 graphs in each cluster for each input document.
 }
 \examples{
-writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
 docname <- c(rep('doc1',20), rep('doc2', 20), rep('doc3', 20))
+writer <- c(rep(1, 20), rep(2, 20), rep(3, 20))
 doc <- c(rep(1, 20), rep(2, 20), rep(3, 20))
 cluster <- sample(3, 60, replace=TRUE)
 df <- data.frame(docname, writer, doc, cluster)