From d649ad8424489fc1add0b70b9cccd8e43f6029f2 Mon Sep 17 00:00:00 2001 From: Ben Raymond Date: Thu, 1 Feb 2024 19:29:49 +1100 Subject: [PATCH] add handler for new Copernicus Marine system --- DESCRIPTION | 1 + NAMESPACE | 1 + R/copernicus_handler.R | 104 +++++++++++++++++++++++++++++++++++ R/utils.R | 11 ++-- man/bb_handler_copernicus.Rd | 25 +++++++++ 5 files changed, 135 insertions(+), 7 deletions(-) create mode 100644 R/copernicus_handler.R create mode 100644 man/bb_handler_copernicus.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 5796c1e..e42b771 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -16,6 +16,7 @@ Imports: archive, assertthat, aws.s3 (>= 0.3.21), + CopernicusMarine, curl, fs, httr, diff --git a/NAMESPACE b/NAMESPACE index e0fc642..f786004 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(bb_fingerprint) export(bb_get) export(bb_gunzip) export(bb_handler_aws_s3) +export(bb_handler_copernicus) export(bb_handler_earthdata) export(bb_handler_oceandata) export(bb_handler_rget) diff --git a/R/copernicus_handler.R b/R/copernicus_handler.R new file mode 100644 index 0000000..0066293 --- /dev/null +++ b/R/copernicus_handler.R @@ -0,0 +1,104 @@ +#' Handler for Copernicus Marine datasets +#' +#' This is a handler function to be used with data sets from Copernicus Marine. This function is not intended to be called directly, but rather is specified as a \code{method} option in \code{\link{bb_source}}. +#' +#' Note that users will need a Copernicus login. +#' +#' @references https://help.marine.copernicus.eu/en/collections/4060068-copernicus-marine-toolbox +#' @param product string: the desired Copernicus marine product. See \code{\link[CopernicusMarine]{cms_products_list}} +# @param layer string: (optional) the layer within a product. See \code{\link[CopernicusMarine]{cms_product_details}} +#' @param ... : parameters passed to \code{\link{bb_rget}} +#' +#' @return TRUE on success +#' +#' @export +bb_handler_copernicus <- function(product, ...) { ##layer, + assert_that(is.string(product), nzchar(product)) + ##if (!missing(layer)) { + ## if (!is.null(layer)) assert_that(is.string(layer), nzchar(layer)) + ##} else { + ## layer <- NULL + ##} + do.call(bb_handler_copernicus_inner, list(..., product = product))##, layer = layer)) +} + + +# @param config bb_config: a bowerbird configuration (as returned by \code{bb_config}) with a single data source +# @param verbose logical: if TRUE, provide additional progress output +# @param local_dir_only logical: if TRUE, just return the local directory into which files from this data source would be saved +bb_handler_copernicus_inner <- function(config, verbose = FALSE, local_dir_only = FALSE, product, ...) { ## layer = NULL + assert_that(is(config, "bb_config")) + assert_that(nrow(bb_data_sources(config)) == 1) + assert_that(is.flag(verbose), !is.na(verbose)) + assert_that(is.flag(local_dir_only), !is.na(local_dir_only)) + assert_that(is.string(product), nzchar(product)) + ##if (!is.null(layer)) assert_that(is.string(layer), nzchar(layer)) + dots <- list(...) + + thisds <- bb_data_sources(config) ## user and password info will be in here + this_att <- bb_settings(config) + + + if (local_dir_only) { + ## actual source URLs look like e.g. https://s3.waw3-1.cloudferro.com/mdl-native-07/native/SEALEVEL_GLO_PHY_L4_NRT_008_046/cmems_obs-sl_glo_phy-ssh_nrt_allsat-l4-duacs-0.25deg_P1D_202311/2022/01/nrt_global_allsat_phy_l4_20220101_20220107.nc + ## but it seems plausible that the server/bucket might change over time + ## so we will store locally under data.marine.copernicus.eu// + return(file.path(this_att$local_file_root, "data.marine.copernicus.eu", product)) + } + +## if (is.null(thisds$user) || is.null(thisds$password) || na_or_empty(thisds$user) || na_or_empty(thisds$password)) stop(sprintf("Copernicus sources require a login: provide your user and password in the source configuration")) + if (verbose) cat("Downloading file list ... \n") + myfiles <- CopernicusMarine::cms_list_stac_files(product) ## layer is ignored in this? + myfiles$url <- paste0("https://", file.path(myfiles$home, myfiles$native, myfiles$current_path)) + ## take the `current_path` of each file and find the product ID + pidx <- stringr::str_locate(myfiles$current_path, stringr::fixed(paste0(product, "/"))) + ## use the section of `current_path` from the product ID onwards as the local directory under "data.marine.copernicus.eu" + myfiles$local_filename <- file.path("data.marine.copernicus.eu", vapply(seq_len(nrow(myfiles)), function(i) if (!is.na(pidx[i, 1])) substr(myfiles$current_path[i], pidx[i, 1], 9999) else myfiles$current_path[i], FUN.VALUE = "", USE.NAMES = FALSE)) ## where local copy will go + myfiles$was_downloaded <- FALSE + if (verbose) cat("\n", nrow(myfiles), " file", if (nrow(myfiles) > 1) "s", " to download\n", sep = "") + ## for each file, download if needed and store in appropriate directory + ok <- TRUE + my_curl_config <- build_curl_config(debug = FALSE, show_progress = verbose) ##, user = thisds$user, password = thisds$password, enforce_basic_auth = TRUE) + fidx <- file.exists(myfiles$local_filename) + myfiles$existing_checksum <- NA_character_ + myfiles$existing_checksum[fidx] <- vapply(myfiles$local_filename[fidx], file_hash, hash = "md5", FUN.VALUE = "", USE.NAMES = FALSE) + for (idx in seq_len(nrow(myfiles))) { + this_url <- myfiles$url[idx] + this_fullfile <- myfiles$local_filename[idx] + if (!this_att$dry_run) { + this_exists <- !is.na(myfiles$existing_checksum[idx]) + download_this <- !this_exists + if (this_att$clobber < 1) { + ## don't clobber existing + } else if (this_att$clobber == 1) { + ## replace existing if server hash does not match that of local copy + if (this_exists) { + download_this <- !isTRUE(myfiles$ETag[idx] == myfiles$checksum[idx]) + } + } else { + download_this <- TRUE + } + if (download_this) { + if (verbose) cat("Downloading:", this_url, "... \n") + if (!dir.exists(dirname(this_fullfile))) dir.create(dirname(this_fullfile), recursive = TRUE) + myfun <- warning ## if (stop_on_download_error) stop else warning + req <- httr::with_config(my_curl_config, httr::GET(this_url, httr::write_disk(path = this_fullfile, overwrite = TRUE))) + if (httr::http_error(req)) { + myfun("Error downloading ", this_url, ": ", httr::http_status(req)$message) + } else { + myfiles$was_downloaded[idx] <- TRUE + } + } else { + if (this_exists) { + if (verbose) cat("not downloading ", myfiles$filename[idx], ", local copy exists with identical checksum\n", sep = "") + } + } + } + } + if (this_att$dry_run && verbose) { + cat(" dry_run is TRUE, bb_handler_oceandata is not downloading the following files:\n", paste(myfiles$url, collapse="\n "), "\n") + } + fls <- myfiles[, c("url", "local_filename", "was_downloaded")] + names(fls)[2] <- "file" + tibble(ok = ok, files = list(fls), message = "") +} diff --git a/R/utils.R b/R/utils.R index 859f556..46960a7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -3,14 +3,11 @@ ## not exported for user ## file checksums -file_hash <- function(filename,hash="sha1") { - assert_that(is.string(hash)) - hash <- match.arg(tolower(hash),c("md5","sha1")) +file_hash <- function(filename, hash = "sha1") { switch(hash, - md5=as.character(openssl::md5(file(filename))), - sha1=as.character(openssl::sha1(file(filename))), - stop("unexpected hash type",hash) - ) + md5 = as.character(openssl::md5(file(filename))), + sha1 = as.character(openssl::sha1(file(filename))), + stop("unexpected hash type", hash)) } ## NA or empty string diff --git a/man/bb_handler_copernicus.Rd b/man/bb_handler_copernicus.Rd new file mode 100644 index 0000000..d0c4c71 --- /dev/null +++ b/man/bb_handler_copernicus.Rd @@ -0,0 +1,25 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/copernicus_handler.R +\name{bb_handler_copernicus} +\alias{bb_handler_copernicus} +\title{Handler for Copernicus Marine datasets} +\usage{ +bb_handler_copernicus(product, ...) +} +\arguments{ +\item{product}{string: the desired Copernicus marine product. See \code{\link[CopernicusMarine]{cms_products_list}}} + +\item{...}{: parameters passed to \code{\link{bb_rget}}} +} +\value{ +TRUE on success +} +\description{ +This is a handler function to be used with data sets from Copernicus Marine. This function is not intended to be called directly, but rather is specified as a \code{method} option in \code{\link{bb_source}}. +} +\details{ +Note that users will need a Copernicus login. +} +\references{ +https://help.marine.copernicus.eu/en/collections/4060068-copernicus-marine-toolbox +}