From 16843c1f0f2d641297897cfa81aaced878bfb37e Mon Sep 17 00:00:00 2001
From: n-kall <n-kall@github.com>
Date: Wed, 15 Nov 2023 13:27:08 +0200
Subject: [PATCH] add log_weights option to pareto functions

---
 R/pareto_smooth.R         | 40 +++++++++++++++++++++++++--------------
 man-roxygen/args-pareto.R |  6 +++++-
 man/pareto_diags.Rd       |  8 +++++++-
 man/pareto_khat.Rd        |  8 +++++++-
 man/pareto_smooth.Rd      |  8 +++++++-
 5 files changed, 52 insertions(+), 18 deletions(-)

diff --git a/R/pareto_smooth.R b/R/pareto_smooth.R
index baaa5833..27b6895a 100644
--- a/R/pareto_smooth.R
+++ b/R/pareto_smooth.R
@@ -29,6 +29,7 @@ pareto_khat.default <- function(x,
                                 r_eff = NULL,
                                 ndraws_tail = NULL,
                                 verbose = FALSE,
+                                log_weights = FALSE,
                                 ...) {
   smoothed <- pareto_smooth.default(
     x,
@@ -38,6 +39,7 @@ pareto_khat.default <- function(x,
     verbose = verbose,
     return_k = TRUE,
     smooth_draws = FALSE,
+    log_weights = log_weights,
     ...)
   return(smoothed$diagnostics)
 }
@@ -120,11 +122,12 @@ pareto_diags <- function(x, ...) UseMethod("pareto_diags")
 #' @rdname pareto_diags
 #' @export
 pareto_diags.default <- function(x,
-                                  tail = c("both", "right", "left"),
-                                  r_eff = NULL,
-                                  ndraws_tail = NULL,
-                                  verbose = FALSE,
-                                  ...) {
+                                 tail = c("both", "right", "left"),
+                                 r_eff = NULL,
+                                 ndraws_tail = NULL,
+                                 verbose = FALSE,
+                                 log_weights = FALSE,
+                                 ...) {
 
   smoothed <- pareto_smooth.default(
     x,
@@ -135,6 +138,7 @@ pareto_diags.default <- function(x,
     extra_diags = TRUE,
     verbose = verbose,
     smooth_draws = FALSE,
+    log_weights = FALSE,
     ...)
 
   return(smoothed$diagnostics)
@@ -234,8 +238,8 @@ pareto_smooth.rvar <- function(x, return_k = TRUE, extra_diags = FALSE, ...) {
       )
     }
     out <- list(
-    x = rvar(apply(draws_diags, margins, function(x) x[[1]]$x), nchains = nchains(x)),
-    diagnostics = diags
+      x = rvar(apply(draws_diags, margins, function(x) x[[1]]$x), nchains = nchains(x)),
+      diagnostics = diags
     )
   } else {
     out <- rvar(apply(draws_diags, margins, function(x) x[[1]]), nchains = nchains(x))
@@ -252,6 +256,7 @@ pareto_smooth.default <- function(x,
                                   return_k = TRUE,
                                   extra_diags = FALSE,
                                   verbose = FALSE,
+                                  log_weights = FALSE,
                                   ...) {
 
   checkmate::assert_number(ndraws_tail, null.ok = TRUE)
@@ -259,6 +264,7 @@ pareto_smooth.default <- function(x,
   checkmate::assert_logical(extra_diags)
   checkmate::assert_logical(return_k)
   checkmate::assert_logical(verbose)
+  checkmate::assert_logical(log_weights)
 
   # check for infinite or na values
   if (should_return_NA(x)) {
@@ -266,6 +272,10 @@ pareto_smooth.default <- function(x,
     return(list(x = x, diagnostics = NA_real_))
   }
 
+  if (log_weights) {
+    tail = "right"
+  }
+  
   tail <- match.arg(tail)
   S <- length(x)
 
@@ -299,6 +309,7 @@ pareto_smooth.default <- function(x,
       x,
       ndraws_tail = ndraws_tail,
       tail = "left",
+      log_weights = log_weights,
       ...
     )
     left_k <- smoothed$k
@@ -308,6 +319,7 @@ pareto_smooth.default <- function(x,
       x = smoothed$x,
       ndraws_tail = ndraws_tail,
       tail = "right",
+      log_weights = log_weights,
       ...
     )
     right_k <- smoothed$k
@@ -358,11 +370,11 @@ pareto_smooth.default <- function(x,
                                 ndraws_tail,
                                 smooth_draws = TRUE,
                                 tail = c("right", "left"),
-                                log = FALSE,
+                                log_weights = FALSE,
                                 ...
                                 ) {
 
-  if (log) {
+  if (log_weights) {
     # shift log values for safe exponentiation
     x <- x - max(x)
   }
@@ -395,7 +407,7 @@ pareto_smooth.default <- function(x,
       k <- NA
     } else {
       # save time not sorting since x already sorted
-      if (log) {
+      if (log_weights) {
         draws_tail <- exp(draws_tail)
         cutoff <- exp(cutoff)
       }
@@ -405,7 +417,7 @@ pareto_smooth.default <- function(x,
       if (is.finite(k) && smooth_draws) {
         p <- (seq_len(ndraws_tail) - 0.5) / ndraws_tail
         smoothed <- qgeneralized_pareto(p = p, mu = cutoff, k = k, sigma = sigma)
-        if (log) {
+        if (log_weights) {
           smoothed <- log(smoothed)
         }
       } else {
@@ -467,11 +479,11 @@ pareto_smooth.default <- function(x,
 #' @noRd
 ps_min_ss <- function(k, ...) {
   if (k < 1) {
-   out <- 10^(1 / (1 - max(0, k)))
+    out <- 10^(1 / (1 - max(0, k)))
   } else {
-   out <- Inf
+    out <- Inf
   }
-   out
+  out
 }
 
 
diff --git a/man-roxygen/args-pareto.R b/man-roxygen/args-pareto.R
index a406dbde..070ded54 100644
--- a/man-roxygen/args-pareto.R
+++ b/man-roxygen/args-pareto.R
@@ -11,10 +11,14 @@
 #' @param ndraws_tail (numeric) number of draws for the tail. If
 #'   `ndraws_tail` is not specified, it will be calculated as
 #'   ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and
-#'   length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).
+#'   length(x) / 5 otherwise (see Appendix H in Vehtari et
+#'   al. (2022)).
 #' @param r_eff (numeric) relative effective sample size estimate. If
 #'   `r_eff` is omitted, it will be calculated assuming the draws are
 #'   from MCMC.
 #' @param verbose (logical) Should diagnostic messages be printed? If
 #'   `TRUE`, messages related to Pareto diagnostics will be
 #'   printed. Default is `FALSE`.
+#' @param log_weights (logical) Are the draws log weights? Default is
+#'   `FALSE`. If `TRUE` computation will take into account that the
+#'   draws are log weights, and only right tail will be smoothed.
diff --git a/man/pareto_diags.Rd b/man/pareto_diags.Rd
index 38e55b5a..b47cab5d 100644
--- a/man/pareto_diags.Rd
+++ b/man/pareto_diags.Rd
@@ -14,6 +14,7 @@ pareto_diags(x, ...)
   r_eff = NULL,
   ndraws_tail = NULL,
   verbose = FALSE,
+  log_weights = FALSE,
   ...
 )
 
@@ -45,11 +46,16 @@ from MCMC.}
 \item{ndraws_tail}{(numeric) number of draws for the tail. If
 \code{ndraws_tail} is not specified, it will be calculated as
 ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and
-length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).}
+length(x) / 5 otherwise (see Appendix H in Vehtari et
+al. (2022)).}
 
 \item{verbose}{(logical) Should diagnostic messages be printed? If
 \code{TRUE}, messages related to Pareto diagnostics will be
 printed. Default is \code{FALSE}.}
+
+\item{log_weights}{(logical) Are the draws log weights? Default is
+\code{FALSE}. If \code{TRUE} computation will take into account that the
+draws are log weights, and only right tail will be smoothed.}
 }
 \value{
 List of Pareto smoothing diagnostics:
diff --git a/man/pareto_khat.Rd b/man/pareto_khat.Rd
index 1c0d0e92..9094fd14 100644
--- a/man/pareto_khat.Rd
+++ b/man/pareto_khat.Rd
@@ -14,6 +14,7 @@ pareto_khat(x, ...)
   r_eff = NULL,
   ndraws_tail = NULL,
   verbose = FALSE,
+  log_weights = FALSE,
   ...
 )
 
@@ -45,11 +46,16 @@ from MCMC.}
 \item{ndraws_tail}{(numeric) number of draws for the tail. If
 \code{ndraws_tail} is not specified, it will be calculated as
 ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and
-length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).}
+length(x) / 5 otherwise (see Appendix H in Vehtari et
+al. (2022)).}
 
 \item{verbose}{(logical) Should diagnostic messages be printed? If
 \code{TRUE}, messages related to Pareto diagnostics will be
 printed. Default is \code{FALSE}.}
+
+\item{log_weights}{(logical) Are the draws log weights? Default is
+\code{FALSE}. If \code{TRUE} computation will take into account that the
+draws are log weights, and only right tail will be smoothed.}
 }
 \value{
 \code{khat} estimated Generalized Pareto Distribution shape parameter k
diff --git a/man/pareto_smooth.Rd b/man/pareto_smooth.Rd
index 4a62a547..cd335da0 100644
--- a/man/pareto_smooth.Rd
+++ b/man/pareto_smooth.Rd
@@ -18,6 +18,7 @@ pareto_smooth(x, ...)
   return_k = TRUE,
   extra_diags = FALSE,
   verbose = FALSE,
+  log_weights = FALSE,
   ...
 )
 }
@@ -56,11 +57,16 @@ from MCMC.}
 \item{ndraws_tail}{(numeric) number of draws for the tail. If
 \code{ndraws_tail} is not specified, it will be calculated as
 ceiling(3 * sqrt(length(x) / r_eff)) if length(x) > 225 and
-length(x) / 5 otherwise (see Appendix H in Vehtari et al. (2022)).}
+length(x) / 5 otherwise (see Appendix H in Vehtari et
+al. (2022)).}
 
 \item{verbose}{(logical) Should diagnostic messages be printed? If
 \code{TRUE}, messages related to Pareto diagnostics will be
 printed. Default is \code{FALSE}.}
+
+\item{log_weights}{(logical) Are the draws log weights? Default is
+\code{FALSE}. If \code{TRUE} computation will take into account that the
+draws are log weights, and only right tail will be smoothed.}
 }
 \value{
 Either a vector \code{x} of smoothed values or a named list