diff --git a/DESCRIPTION b/DESCRIPTION index bf32fd3..6093d3b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: ohcleandat Type: Package Title: One Health Data Cleaning and Quality Checking Package -Version: 0.3.8 +Version: 0.3.9 Authors@R: c( person("Collin", "Schwantes", email = "schwantes@ecohealthalliance.org", role = c("cre", "aut"), comment = c(ORCID = "0000-0003-4014-4896")), person("Johana", "Teigen", email = "teigen@ecohealthalliance.org", role = "aut", comment = c(ORCID = "0000-0002-6209-2321")), @@ -24,6 +24,7 @@ Imports: containerTemplateUtils (>= 0.0.0.9006), dplyr, frictionless, + fs, googledrive, googlesheets4, here, diff --git a/NAMESPACE b/NAMESPACE index 2191136..821e2ce 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(get_species_letter) export(guess_col_type) export(id_checker) export(make_report_urls) +export(make_zip_path) export(obfuscate_gps) export(obfuscate_lat) export(obfuscate_lon) diff --git a/NEWS.md b/NEWS.md index 29876bd..5a553f8 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,7 @@ +# ohcleandat 0.3.9 + +* Files over 300mb are zipped before attempting to upload them to dropbox. Zipped validation logs on dropbox are automatically unzipped. + # ohcleandat 0.3.8 * Fixing bug in bug fix - naming properties that will be updated diff --git a/R/dropbox_upload.R b/R/dropbox_upload.R index e4cee4c..5fa5217 100644 --- a/R/dropbox_upload.R +++ b/R/dropbox_upload.R @@ -1,14 +1,16 @@ #' Dropbox Upload #' -#' Upload a local file to dropbox and handle authentication. +#' Upload a local file to dropbox and handle authentication. Automatically +#' zips files over 300mb by default. #' #' @details #' This is a wrapper of `rdrop2::drop_upload()` which first reads in a local #' CSV file and then uploads to a DropBox path. #' -#' @param log Validation Log for OH cleaning pipelines -#' @param file_path local file path for upload -#' @param dropbox_path relative dropbox path +#' @param log dataframe. Validation Log for OH cleaning pipelines. Will work with any tabular data. +#' @param file_path character. local file path for upload +#' @param dropbox_path character. relative dropbox path +#' @param compress logical. Should files over 300mb be compressed? #' #' @return performs drop box upload #' @export @@ -21,10 +23,22 @@ #' ) #' } #' -dropbox_upload <- function(log, file_path, dropbox_path) { +dropbox_upload <- function(log, file_path, dropbox_path,compress = TRUE) { log_export <- readr::write_csv(log, file_path) + # set file path for file to upload + file_to_upload <- file_path + + # check the file size + file_size_check <- (file.size(file_path)/10^6) > 300 + + # if compress and file size is greater than 300 then zip it + if(all(compress,file_size_check)){ + file_to_upload <- make_zip_path(file_path) + utils::zip(zipfile = file_to_upload,files = file_path) + } + # upload - rdrop2::drop_upload(file_path, dropbox_path) + rdrop2::drop_upload(file_to_upload, dropbox_path) } diff --git a/R/get_dropbox_val_logs.R b/R/get_dropbox_val_logs.R index 61909b6..a3857df 100644 --- a/R/get_dropbox_val_logs.R +++ b/R/get_dropbox_val_logs.R @@ -6,7 +6,11 @@ #' locally download the file to 'dropbox_validations' directory and read in to the #' session. #' -#' @param file_name character file name with extension of the validation log +#' @param file_name character file name with extension of the validation log. +#' Note that file may have been zipped on upload if its over 300mb. This file +#' will be automatically unzipped on download so provide the file extenstion for +#' the compressed file, not the zipped file. E.g. "val_log.csv" even if on +#' dropbox its stored as "val_log.zip". #' @param folder character the folder the log is saved in on drop box. Can be NULL if not in subfolder. #' @param path_name character the default drop box path #' @@ -29,7 +33,11 @@ get_dropbox_val_logs <- # check file exists - it wont on first push if (!rdrop2::drop_exists(full_path_name)) { - return(NULL) + # check for zip version + full_path_name <- make_zip_path(full_path_name) + if(!rdrop2::drop_exists(full_path_name)){ + return(NULL) + } } # download file from drop box @@ -42,6 +50,12 @@ get_dropbox_val_logs <- # reading in the log, detecting with excel or csv local_path <- sprintf("%s/%s", "dropbox_validations", file_name) + # unzip if zipped + if (stringr::str_detect(full_path_name, ".zip")) { + local_zip_path <- make_zip_path(local_path) + utils::unzip(zipfile = local_zip_path,files = here::here(local_path)) + } + if (stringr::str_detect(file_name, ".xls|.xlsx")) { df <- readxl::read_xlsx(here::here(local_path)) } @@ -53,6 +67,8 @@ get_dropbox_val_logs <- na = character()) } + + # this ensures the log is ordered correctly before cleaning operations in case the user # has sorted the data before upload. Order is important so changes are processed sequentially. df_out <- df |> diff --git a/R/make_zip_path.R b/R/make_zip_path.R new file mode 100644 index 0000000..9a17b99 --- /dev/null +++ b/R/make_zip_path.R @@ -0,0 +1,24 @@ +#' Get make a zip file path +#' +#' Take a file path, remove the extension, replace the extension +#' with .zip +#' +#' @param file_path character. +#' +#' @return character. String where extension is replaced by zip +#' @export +#' +#' @examples +#' +#' file_path <- "hello.csv" +#' make_zip_path(file_path) +#' +#' file_path_with_dir <- "foo/bar/hello.csv" +#' make_zip_path(file_path_with_dir) +#' +make_zip_path <- function(file_path){ + file_path_no_ext <- fs::path_ext_remove(file_path) + zip_path <- sprintf("%s.zip",file_path_no_ext) + return(zip_path) +} + diff --git a/man/dropbox_upload.Rd b/man/dropbox_upload.Rd index d824c90..8c227b3 100644 --- a/man/dropbox_upload.Rd +++ b/man/dropbox_upload.Rd @@ -4,20 +4,23 @@ \alias{dropbox_upload} \title{Dropbox Upload} \usage{ -dropbox_upload(log, file_path, dropbox_path) +dropbox_upload(log, file_path, dropbox_path, compress = TRUE) } \arguments{ -\item{log}{Validation Log for OH cleaning pipelines} +\item{log}{dataframe. Validation Log for OH cleaning pipelines. Will work with any tabular data.} -\item{file_path}{local file path for upload} +\item{file_path}{character. local file path for upload} -\item{dropbox_path}{relative dropbox path} +\item{dropbox_path}{character. relative dropbox path} + +\item{compress}{logical. Should files over 300mb be compressed?} } \value{ performs drop box upload } \description{ -Upload a local file to dropbox and handle authentication. +Upload a local file to dropbox and handle authentication. Automatically +zips files over 300mb by default. } \details{ This is a wrapper of \code{rdrop2::drop_upload()} which first reads in a local diff --git a/man/get_dropbox_val_logs.Rd b/man/get_dropbox_val_logs.Rd index 0edd13a..136dca9 100644 --- a/man/get_dropbox_val_logs.Rd +++ b/man/get_dropbox_val_logs.Rd @@ -7,7 +7,11 @@ get_dropbox_val_logs(file_name, folder, path_name) } \arguments{ -\item{file_name}{character file name with extension of the validation log} +\item{file_name}{character file name with extension of the validation log. +Note that file may have been zipped on upload if its over 300mb. This file +will be automatically unzipped on download so provide the file extenstion for +the compressed file, not the zipped file. E.g. "val_log.csv" even if on +dropbox its stored as "val_log.zip".} \item{folder}{character the folder the log is saved in on drop box. Can be NULL if not in subfolder.} diff --git a/man/make_zip_path.Rd b/man/make_zip_path.Rd new file mode 100644 index 0000000..b6a1942 --- /dev/null +++ b/man/make_zip_path.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/make_zip_path.R +\name{make_zip_path} +\alias{make_zip_path} +\title{Get make a zip file path} +\usage{ +make_zip_path(file_path) +} +\arguments{ +\item{file_path}{character.} +} +\value{ +character. String where extension is replaced by zip +} +\description{ +Take a file path, remove the extension, replace the extension +with .zip +} +\examples{ + +file_path <- "hello.csv" +make_zip_path(file_path) + +file_path_with_dir <- "foo/bar/hello.csv" +make_zip_path(file_path_with_dir) + +}