Skip to content

Commit

Permalink
fix for changes in json returned by zenodo
Browse files Browse the repository at this point in the history
  • Loading branch information
raymondben committed Feb 1, 2024
1 parent d802a17 commit c52356e
Showing 1 changed file with 21 additions and 4 deletions.
25 changes: 21 additions & 4 deletions R/zenodo.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,18 @@
#' @export
bb_zenodo_source <- function(id, use_latest = FALSE) {
jx <- jsonlite::fromJSON(paste0("https://zenodo.org/api/records/", id))
## $metadata$relations$version$is_last will be TRUE for latest version?
if (isTRUE(use_latest) && length(jx$links$latest) == 1 && nzchar(jx$links$latest) && !is.na(jx$links$latest)) {
latest_id <- stringr::str_match(jx$links$latest, "zenodo.org/api/records/([[:digit:]]+)$")
if (nrow(latest_id) == 1) return(bb_zenodo_source(id = latest_id[1, 2], use_latest = FALSE))
if (grepl("zenodo.org/api/records/([[:digit:]]+)$", jx$links$latest)) {
## old format? latest ID is given directly
latest_id <- stringr::str_match(jx$links$latest, "zenodo.org/api/records/([[:digit:]]+)$")
if (nrow(latest_id) == 1 && !is.na(latest_id[1, 2])) return(bb_zenodo_source(id = latest_id[1, 2], use_latest = FALSE))
} else if (grepl("zenodo.org/api/records/([[:digit:]]+)/", jx$links$latest)) {
## format has changed? (Feb 2024) - jx$links$latest gives the URL to the latest URL but does not have the latest ID in it
jx <- jsonlite::fromJSON(jx$links$latest)
} else {
warning("could not find latest record, falling back to `use_latest = FALSE`")
}
}
ne_or <- function(z, or) tryCatch(if (!is.null(z) && nzchar(z)) z else or, error = function(e) or)
## collection size
Expand All @@ -36,14 +45,22 @@ bb_zenodo_source <- function(id, use_latest = FALSE) {
if (any(grepl("\\.zip$", jx$files$links$download, ignore.case = TRUE))) postproc <- c(postproc, list("unzip"))
if (any(grepl("\\.gz$", jx$files$links$download, ignore.case = TRUE))) postproc <- c(postproc, list("gunzip"))
## maybe other post-processors
## list all urls. Does this cover datasets with multiple buckets? (Are there such things?)
surls <- if ("download" %in% names(jx$files$links)) {
jx$files$links$download
} else if ("self" %in% names(jx$files$links)) {
jx$files$links$self
} else {
stop("could not find download URLs for zenodo source")
}
bb_source(name = ne_or(jx$title, ne_or(jx$metadata$title, "Dataset title")),
id = ne_or(doi, id),
description = ne_or(jx$metadata$description, "Dataset description"),
##keywords = ne_or(jx$metadata$keywords, NA_character_),
doc_url = doc_url,
citation = paste0("See ", doc_url, " for the correct citation"), ## seems odd that this isn't part of the record
license = ne_or(jx$metadata$license, paste0("See ", doc_url, " for license information")),
source_url = jx$files$links$download, ## list all urls. Does this cover datasets with multiple buckets? (Are there such things?)
license = ne_or(unlist(unname(jx$metadata$license)), paste0("See ", doc_url, " for license information")),
source_url = surls,
method = list("bb_handler_rget", level = 1L, accept_download = ".*"), ## we know that we want to download everything here, so just accept everything for download
comment = "Source definition created by bb_zenodo_source",
postprocess = postproc,
Expand Down

0 comments on commit c52356e

Please sign in to comment.