Skip to content

Commit

Permalink
modify oceandata_url_mapper to work on base filenames as well as full…
Browse files Browse the repository at this point in the history
… URLs, add parm to oceandata data_source_dir output v0.16.0
  • Loading branch information
raymondben committed Jul 30, 2024
1 parent f51faa3 commit a16124b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 21 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: bowerbird
Type: Package
Title: Keep a Collection of Sparkly Data Resources
Version: 0.15.4
Version: 0.16.0
Authors@R: c(person("Ben", "Raymond", email = "ben.raymond@aad.gov.au", role = c("aut", "cre")),
person("Michael", "Sumner", role = "aut"),
person("Miles", "McBain", email = "miles.mcbain@gmail.com", role = c("rev", "ctb")),
Expand Down
39 changes: 24 additions & 15 deletions R/oceandata.R
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ bb_handler_oceandata_inner <- function(config, verbose = FALSE, local_dir_only =
}
out <- file.path(temp$hostname, out[1], out[2])
} else {
## note that we can't use url_mapper here, because the search string in the source definition is unlikely to conform to the expected full pattern
## highest-level dir
out <- "oceandata.sci.gsfc.nasa.gov"
## refine by platform. Find platform, either full or one-letter abbrev
Expand All @@ -113,10 +114,17 @@ bb_handler_oceandata_inner <- function(config, verbose = FALSE, local_dir_only =
## spatial res
if (grepl("4km", search)) out <- file.path(out, "4km") else if (grepl("9km", search)) out <- file.path(out, "9km")
## next level down is parameter name
ptbl <- oceandata_parameters()
pn <- stringr::str_detect(search, ptbl$pattern)
if (sum(tp, na.rm = TRUE) > 0) {
dest_p <- unique(ptbl$parameter[which(pn)])
## we can match on multiple parameters so long as they all map to the same parameter directory name
if (length(dest_p) == 1) out <- file.path(out, dest_p)
}
}
return(file.path(this_att$local_file_root, out))
}
if (is.null(thisds$user) || is.null(thisds$password) || na_or_empty(thisds$user) || na_or_empty(thisds$password)) stop(sprintf("Oceandata sources now require an Earthdata login: provide your user and password in the source configuration"))
if (is.null(thisds$user) || is.null(thisds$password) || na_or_empty(thisds$user) || na_or_empty(thisds$password)) stop("Oceandata sources require an Earthdata login: provide your user and password in the source configuration")
tries <- 0
## don't show progress for the file index
my_curl_config <- build_curl_config(debug = FALSE, show_progress = FALSE, user = thisds$user, password = thisds$password)
Expand Down Expand Up @@ -423,45 +431,46 @@ oceandata_find_platform <- function(x) {
# @param sep string: the path separator to use
# @return Either the directory string corresponding to the URL code, if \code{abbrev} supplied, or a data.frame of all URL regexps and corresponding directory name strings if \code{urlparm} is missing
# @export
oceandata_url_mapper <- function(this_url,path_only=FALSE,sep=.Platform$file.sep) {
## take getfile URL and return (relative) path to put the file into
## this_url should look like: https://oceandata.sci.gsfc.nasa.gov/ob/getfile/A2002359.L3m_DAY_CHL_chlor_a_9km.bz2
oceandata_url_mapper <- function(this_url, path_only = FALSE, sep = .Platform$file.sep) {
## take getfile URL or base filename and return (relative) path to put the file into
## this_url should look like e.g. (old format) https://oceandata.sci.gsfc.nasa.gov/ob/getfile/A2002359.L3m_DAY_CHL_chlor_a_9km.bz2
## or (newer format) https://oceandata.sci.gsfc.nasa.gov/ob/getfile/AQUA_MODIS.20230109.L3b.DAY.RRS.nc
## Mapped files (L3m) should become oceandata.sci.gsfc.nasa.gov/platform/Mapped/timeperiod/spatial/parm/[yyyy/]basename
## [yyyy] only for 8Day,Daily,Rolling_32_Day
## Binned files (L3b) should become oceandata.sci.gsfc.nasa.gov/platform/L3BIN/yyyy/ddd/basename
assert_that(is.string(this_url))
assert_that(is.flag(path_only),!is.na(path_only))
assert_that(is.flag(path_only), !is.na(path_only))
assert_that(is.string(sep))
if (grepl("\\.L3m[_\\.]",this_url)) {
if (grepl("\\.L3m[_\\.]", this_url)) {
## mapped file
url_parts <- str_match(this_url,"/([ASTCV]|AQUA_MODIS|SEASTAR_SEAWIFS_GAC|TERRA_MODIS|NIMBUS7_CZCS|SNPP_VIIRS|JPSS1_VIIRS)\\.?([[:digit:]_]+)\\.(L3m)[_\\.]([[:upper:][:digit:]]+)[_\\.](.*?)[_\\.](9|4)(km)?\\..*?(bz2|nc)")
url_parts <- str_match(basename(this_url), "^([ASTCV]|AQUA_MODIS|SEASTAR_SEAWIFS_GAC|TERRA_MODIS|NIMBUS7_CZCS|SNPP_VIIRS|JPSS1_VIIRS)\\.?([[:digit:]_]+)\\.(L3m)[_\\.]([[:upper:][:digit:]]+)[_\\.](.*?)[_\\.](9|4)(km)?\\..*?(bz2|nc)")
## e.g. [1,] "https://oceandata.sci.gsfc.nasa.gov/ob/getfile/A2002359.L3m_DAY_CHL_chlor_a_9km"
## [,2] [,3] [,4] [,5] [,6] [,7]
## "A" "2002359" "L3m" "DAY" "CHL_chlor_a" "9"
url_parts <- as.data.frame(url_parts, stringsAsFactors = FALSE)
colnames(url_parts) <- c("full_url", "platform", "date", "type", "timeperiod", "parm", "spatial", "spatial_unit", "ext")
## map back to old sensor abbreviations, at least temporarily
url_parts$platform <- oceandata_platform_to_abbrev(url_parts$platform)
} else if (grepl("\\.L3b[_\\.]",this_url)) {
} else if (grepl("\\.L3b[_\\.]", this_url)) {

url_parts <- str_match(this_url,"/([ASTCV]|AQUA_MODIS|SEASTAR_SEAWIFS_GAC|TERRA_MODIS|NIMBUS7_CZCS|SNPP_VIIRS|JPSS1_VIIRS)\\.?([[:digit:]]+)\\.(L3b)[_\\.]([[:upper:][:digit:]]+)[_\\.](.*?)\\.(bz2|nc)")
url_parts <- str_match(basename(this_url), "^([ASTCV]|AQUA_MODIS|SEASTAR_SEAWIFS_GAC|TERRA_MODIS|NIMBUS7_CZCS|SNPP_VIIRS|JPSS1_VIIRS)\\.?([[:digit:]]+)\\.(L3b)[_\\.]([[:upper:][:digit:]]+)[_\\.](.*?)\\.(bz2|nc)")
## https://oceandata.sci.gsfc.nasa.gov/ob/getfile/A20090322009059.L3b_MO_KD490.main.bz2

## e.g. [1,] "https://oceandata.sci.gsfc.nasa.gov/ob/getfile/A20090322009059.L3b_MO_KD490.main.bz2" "A" "20090322009059" "L3b" "MO" "KD490"
## https://oceandata.sci.gsfc.nasa.gov/ob/getfile/A2015016.L3b_DAY_RRS.nc
url_parts <- as.data.frame(url_parts,stringsAsFactors=FALSE)
colnames(url_parts) <- c("full_url","platform","date","type","timeperiod","parm")
url_parts <- as.data.frame(url_parts, stringsAsFactors = FALSE)
colnames(url_parts) <- c("full_url", "platform", "date", "type", "timeperiod", "parm")
url_parts$platform <- oceandata_platform_to_abbrev(url_parts$platform)
} else if (grepl("\\.L2", this_url)) {
# "https://oceandata.sci.gsfc.nasa.gov/ob/getfile/A2017002003000.L2_LAC_OC.nc"
url_parts <- str_match(this_url,"/([ASTCV]|AQUA_MODIS|SEASTAR_SEAWIFS_GAC|TERRA_MODIS|NIMBUS7_CZCS|SNPP_VIIRS|JPSS1_VIIRS)\\.?([[:digit:]]+)\\.(L2)[_\\.]([[:upper:][:digit:]]+)[_\\.](.*?)\\.(bz2|nc)")
url_parts <- as.data.frame(url_parts,stringsAsFactors=FALSE)
colnames(url_parts) <- c("full_url","platform","date","type","coverage","parm", "extension")
url_parts <- str_match(basename(this_url), "^([ASTCV]|AQUA_MODIS|SEASTAR_SEAWIFS_GAC|TERRA_MODIS|NIMBUS7_CZCS|SNPP_VIIRS|JPSS1_VIIRS)\\.?([[:digit:]]+)\\.(L2)[_\\.]([[:upper:][:digit:]]+)[_\\.](.*?)\\.(bz2|nc)")
url_parts <- as.data.frame(url_parts, stringsAsFactors = FALSE)
colnames(url_parts) <- c("full_url", "platform", "date", "type", "coverage", "parm", "extension")
url_parts$platform <- oceandata_platform_to_abbrev(url_parts$platform)
} else {
stop("not a L2 or L3 binned or L3 mapped file")
}
this_year <- substr(url_parts$date,1,4)
this_year <- substr(url_parts$date, 1, 4)
if (is.na(url_parts$type)) {
## no type provided? we can't proceed with the download, anyway
stop("cannot ascertain file type from oceancolor URL: ",this_url)
Expand Down
10 changes: 5 additions & 5 deletions tests/testthat/test_oceandata.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ test_that("bb_handler_oceandata works",{
data_group="Sea surface temperature")
temp_root <- tempdir()
ocf <- bb_add(bb_config(local_file_root=temp_root),ods)
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/MODIST/Mapped/Monthly/9km$",bb_data_source_dir(ocf)))
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/MODIST/Mapped/Monthly/9km/par$",bb_data_source_dir(ocf)))
bb_sync(ocf, confirm_downloads_larger_than = NULL)
fnm <- "oceandata.sci.gsfc.nasa.gov/MODIST/Mapped/Monthly/9km/par/TERRA_MODIS.20000301_20000331.L3m.MO.PAR.par.9km.nc" ## relative file name
expect_true(file.exists(file.path(temp_root,fnm)))
Expand Down Expand Up @@ -59,7 +59,7 @@ test_that("other oceandata handler tests", {
user = "", password = "", warn_empty_auth = FALSE)
temp_root <- tempdir()
ocf <- bb_add(bb_config(local_file_root = temp_root), ods)
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/MODISA/L3BIN/Daily$", bb_data_source_dir(ocf)))
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/MODISA/L3BIN/Daily/RRS$", bb_data_source_dir(ocf)))

ods <- bb_source(name="Oceandata VIIRS Level-3 binned daily RRS",
id="VIIRS_L3b_DAY_SNPP_RRS",
Expand All @@ -70,7 +70,7 @@ test_that("other oceandata handler tests", {
method=list("bb_handler_oceandata", search = "SNPP_VIIRS*L3b.DAY.RRS.nc", sensor = "viirs", dtype = "L3b"),
user = "", password = "", warn_empty_auth = FALSE)
ocf <- bb_add(bb_config(local_file_root = temp_root), ods)
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/VIIRS/L3BIN/Daily$", bb_data_source_dir(ocf)))
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/VIIRS/L3BIN/Daily/RRS$", bb_data_source_dir(ocf)))

ods <- bb_source(name="Oceandata MODIS Aqua Level-3 mapped daily 4km chl-a",
id="MODISA_L3m_DAY_CHL_chlor_a_4km",
Expand All @@ -81,7 +81,7 @@ test_that("other oceandata handler tests", {
method=list("bb_handler_oceandata",search="AQUA_MODIS*L3m.DAY.CHL.chlor_a.4km.nc", sensor = "aqua", dtype = "L3m"),
user = "", password = "", warn_empty_auth = FALSE)
ocf <- bb_add(bb_config(local_file_root = temp_root), ods)
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/MODISA/Mapped/Daily/4km$", bb_data_source_dir(ocf)))
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/MODISA/Mapped/Daily/4km/chlor$", bb_data_source_dir(ocf)))

ods <- bb_source(name="Oceandata SeaWiFS Level-3 binned daily RRS",
id="SeaWiFS_L3b_DAY_RRS",
Expand All @@ -92,7 +92,7 @@ test_that("other oceandata handler tests", {
method=list("bb_handler_oceandata",search="SEASTAR_SEAWIFS_GAC*L3b.DAY.RRS.nc", sensor = "seawifs", dtype = "L3b"),
user = "", password = "", warn_empty_auth = FALSE)
ocf <- bb_add(bb_config(local_file_root = temp_root), ods)
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/SeaWiFS/L3BIN/Daily$", bb_data_source_dir(ocf)))
expect_true(grepl("oceandata.sci.gsfc.nasa.gov/SeaWiFS/L3BIN/Daily/RRS$", bb_data_source_dir(ocf)))
})

test_that("bb_handler_oceandata works when no files match",{
Expand Down

0 comments on commit a16124b

Please sign in to comment.