diff --git a/NAMESPACE b/NAMESPACE index c35680d..a4e47b5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(compare_documents) export(compare_writer_profiles) export(get_cluster_fill_rates) export(get_distances) +export(get_rates_of_misleading_slrs) export(get_ref_scores) export(interpret_slr) export(plot_scores) diff --git a/R/slrs.R b/R/slrs.R index eafac5d..1544181 100644 --- a/R/slrs.R +++ b/R/slrs.R @@ -138,6 +138,51 @@ interpret_slr <- function(df) { return(x) } +#' Get Rates of Misleading Evidence for SLRs +#' +#' Calculate the rates of misleading evidence for score-based likelihood ratios +#' (SLRs) when the ground truth is known. +#' +#' @param df A data frame of SLRs from [`compare_writer_profiles`] with +#' `score_only = FALSE`. +#' @param threshold A number greater than zero that serves as a decision +#' threshold. If the ground truth for two documents is that they came from the +#' same writer and the SLR is less than the decision threshold, this is +#' misleading evidence that incorrectly supports the defense (false negative). +#' If the ground truth for two documents is that they came from different +#' writers and the SLR is greater than the decision threshold, this is +#' misleading evidence that incorrectly supports the prosecution (false +#' positive). +#' +#' @return A list +#' @export +#' +#' @examples +#' \donttest{ +#' comparisons <- compare_writer_profiles(test, score_only = FALSE) +#' get_rates_of_misleading_slrs(comparisons) +#' } +#' +get_rates_of_misleading_slrs <- function(df, threshold = 1) { + # Use across to prevent "no visible binding for global variable" + known_matches <- df |> + dplyr::filter(dplyr::across(c("ground_truth")) == "same writer") + known_non_matches <- df |> + dplyr::filter(dplyr::across(c("ground_truth")) == "different writer") + + # Use across to prevent "no visible binding for global variable" + fn <- known_matches |> + dplyr::filter(dplyr::across(c("slr")) < threshold) + fp <- known_non_matches |> + dplyr::filter(dplyr::across(c("slr")) > threshold) + + defense <- nrow(fn) / nrow(known_matches) + prosecution <- nrow(fp) / nrow(known_non_matches) + + return(list("incorrectly_favors_defense" = defense, "incorrectly_favors_prosecution" = prosecution)) + +} + # Internal Functions ------------------------------------------------------ diff --git a/man/get_rates_of_misleading_slrs.Rd b/man/get_rates_of_misleading_slrs.Rd new file mode 100644 index 0000000..d42aba2 --- /dev/null +++ b/man/get_rates_of_misleading_slrs.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/slrs.R +\name{get_rates_of_misleading_slrs} +\alias{get_rates_of_misleading_slrs} +\title{Get Rates of Misleading Evidence for SLRs} +\usage{ +get_rates_of_misleading_slrs(df, threshold = 1) +} +\arguments{ +\item{df}{A data frame of SLRs from \code{\link{compare_writer_profiles}} with +\code{score_only = FALSE}.} + +\item{threshold}{A number greater than zero that serves as a decision +threshold. If the ground truth for two documents is that they came from the +same writer and the SLR is less than the decision threshold, this is +misleading evidence that incorrectly supports the defense (false negative). +If the ground truth for two documents is that they came from different +writers and the SLR is greater than the decision threshold, this is +misleading evidence that incorrectly supports the prosecution (false +positive).} +} +\value{ +A list +} +\description{ +Calculate the rates of misleading evidence for score-based likelihood ratios +(SLRs) when the ground truth is known. +} +\examples{ +\donttest{ +comparisons <- compare_writer_profiles(test, score_only = FALSE) +get_rates_of_misleading_slrs(comparisons) +} + +}