From e62f98f7eccbdcc97e1596a3dca0f44e653467e4 Mon Sep 17 00:00:00 2001 From: deanmarchiori Date: Tue, 16 Apr 2024 15:37:50 +1000 Subject: [PATCH] adding baseline id checker functions --- NAMESPACE | 2 + R/get_species_letter.R | 24 ++++++++++ R/id_checker.R | 92 +++++++++++++++++++++++++++++++++++++++ man/get_species_letter.Rd | 20 +++++++++ man/id_checker.Rd | 34 +++++++++++++++ 5 files changed, 172 insertions(+) create mode 100644 R/get_species_letter.R create mode 100644 R/id_checker.R create mode 100644 man/get_species_letter.Rd create mode 100644 man/id_checker.Rd diff --git a/NAMESPACE b/NAMESPACE index 33f8394..7a68dfa 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,6 +15,8 @@ export(dropbox_upload) export(get_dropbox_val_logs) export(get_odk_form_schema) export(get_odk_responses) +export(get_species_letter) +export(id_checker) export(make_report_urls) export(othertext_lookup) export(read_excel_all_sheets) diff --git a/R/get_species_letter.R b/R/get_species_letter.R new file mode 100644 index 0000000..69a8bd5 --- /dev/null +++ b/R/get_species_letter.R @@ -0,0 +1,24 @@ +#' Get Species Letter +#' +#' This function maps the relationship between animal species and hum_anim_id codes. +#' This is for use in id_checker() +#' +#' @param species character The species identifier. See argument options +#' @export +#' +#' @return character The hum_anim_id code +get_species_letter <- function(species = c("human", "cattle", "small_mammal", "sheep", "goat")){ + + if(missing(species)){stop("species must not be missing. See get_species_letter() for options.")} + + x <- match.arg(species) + + switch(x, + "human" = "H", + "cattle" = "C", + "small_mammal" = "SM", + "sheep" = "S", + "goat" = "G" + ) + +} diff --git a/R/id_checker.R b/R/id_checker.R new file mode 100644 index 0000000..1f44417 --- /dev/null +++ b/R/id_checker.R @@ -0,0 +1,92 @@ +#' ID Checker +#' +#' General function for checking and correcting ID columns. +#' +#' In order to use the autobot process for correcting ID columns, a new 'corrected' +#' column is created by the user using the id_checker() function. It will take an +#' existing vector of ID's, and an ID type (animal, mosquito, etc) and apply the +#' bespoke corrections. This can then be consumed by the autobot log. +#' +#' @param col The vector of ID's to be checked +#' @param type The ID type, see argument options for allowable settings +#' @param ... other function arguments passed to get_species_letter +#' +#' @return vector of corrected ID's +#' @export +#' +#' @examples +#' \dontrun{ +#' # with a species identifier +#' data |> mutate(animal_id_new = id_checker(animal_id, type = "animal", species = "cattle")) +#' data |> mutate(farm_id_new = id_checker(farm_id, type = "site")) +#' } +id_checker <- function(col, type = c("animal", "hum_anim", "site"), ...){ + + if(missing(type)){stop("type must not be missing")} + + type <- match.arg(type, c("animal", "hum_anim", "site")) + + # animal ID checks + if(type == "animal") { + # checks the characters of the hyphen and pads to the correct length with zeros + correction <- paste0( + stringr::str_extract({{col}}, "^.+\\-"), # all characters from the start of the ID to the hyphen + stringr::str_pad( + stringr::str_extract({{col}}, "(?<=\\-).+$"), # all character from after the hyphen to the end + side = "left", + pad = "0", + width = 4 + ) + ) + # post corrections this checks the ID is formatted correctly, else sets to NA for manual flagging + new <- ifelse(stringr::str_detect(correction, "^KZNRVF(22|23)(C|G|S)\\-\\d{4}"), correction, NA) + } + + # Human and Animal ID checks + if(type == "hum_anim"){ + + # should be upper case + c1 <- toupper({{col}}) + + # species code should match pattern based on what type of animal + c2 <- ifelse(stringr::str_extract(c1, "^\\D+") == get_species_letter(...), c1, NA) + + # replace O with 0 in numeric section + c3 <- stringr::str_replace(c2, pattern = "O(?=.{0,3}$)", replacement = "0") + + # left pad numeric part with 0 + c4 <- stringr::str_replace(c3, pattern = "\\d+$", stringr::str_pad(stringr::str_extract(c3, "\\d+$"), side = "left", pad = "0", width = 4)) + + # Check anything that is "0000" manually + c5 <- ifelse(stringr::str_detect(c4, "0{4}$"), NA, c4) + + # final format check + new <- ifelse(stringr::str_detect(c5, "^(H|C|S|G|SM)\\d{4}$"), c5, NA) + + } + + # SiteID checks + if(type == "site"){ + + # trim white space + c1 <- stringr::str_trim({{col}}) + + # to upper case + c2 <- toupper(c1) + + # 2. correct o's in first numeric part + c3 <- stringr::str_replace(c2, pattern = "(?<=^.{0,2})O", replacement = "0") + + # 3. numbers padded with zero to increase to 3 characters + c4 <- stringr::str_replace(c3, pattern = "^\\d+", stringr::str_pad(stringr::str_extract(c3, "^\\d+"), side = "left", pad = "0", width = 3)) + + # Format check + new <- ifelse(stringr::str_detect(c4, "^\\d{3}.{3}\\d{3}$"), c4, NA) + } + + return(new) + +} + + + diff --git a/man/get_species_letter.Rd b/man/get_species_letter.Rd new file mode 100644 index 0000000..73ac89b --- /dev/null +++ b/man/get_species_letter.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_species_letter.R +\name{get_species_letter} +\alias{get_species_letter} +\title{Get Species Letter} +\usage{ +get_species_letter( + species = c("human", "cattle", "small_mammal", "sheep", "goat") +) +} +\arguments{ +\item{species}{character The species identifier. See argument options} +} +\value{ +character The hum_anim_id code +} +\description{ +This function maps the relationship between animal species and hum_anim_id codes. +This is for use in id_checker() +} diff --git a/man/id_checker.Rd b/man/id_checker.Rd new file mode 100644 index 0000000..5773713 --- /dev/null +++ b/man/id_checker.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/id_checker.R +\name{id_checker} +\alias{id_checker} +\title{ID Checker} +\usage{ +id_checker(col, type = c("animal", "hum_anim", "site"), ...) +} +\arguments{ +\item{col}{The vector of ID's to be checked} + +\item{type}{The ID type, see argument options for allowable settings} + +\item{...}{other function arguments passed to get_species_letter} +} +\value{ +vector of corrected ID's +} +\description{ +General function for checking and correcting ID columns. +} +\details{ +In order to use the autobot process for correcting ID columns, a new 'corrected' +column is created by the user using the id_checker() function. It will take an +existing vector of ID's, and an ID type (animal, mosquito, etc) and apply the +bespoke corrections. This can then be consumed by the autobot log. +} +\examples{ +\dontrun{ +# with a species identifier + data |> mutate(animal_id_new = id_checker(animal_id, type = "animal", species = "cattle")) + data |> mutate(farm_id_new = id_checker(farm_id, type = "site")) + } +}