From fd9c1ddd33eba8aeba97d3041cbe5b48ba40241c Mon Sep 17 00:00:00 2001 From: lakikowolfe Date: Thu, 25 Apr 2024 15:12:10 -0700 Subject: [PATCH] fix scheduled job update fxns --- R/manifest.R | 49 +++++++++++++++++++------------------------------ 1 file changed, 19 insertions(+), 30 deletions(-) diff --git a/R/manifest.R b/R/manifest.R index ab45174..9073cea 100644 --- a/R/manifest.R +++ b/R/manifest.R @@ -206,21 +206,22 @@ fill_dataflow_manifest <- function(dataflow_manifest_chunk, access_token = access_token ) }) - } - # Convert the named vector into a dataframe - study_status_df <- data.frame( - contributor_id = names(study_status), - study_status = study_status - ) + # Convert the named vector into a dataframe + study_status_df <- data.frame( + contributor_id = names(study_status), + study_status = study_status + ) - # merge study status into dataflow manifest chunk - dataflow_manifest_chunk <- merge( - x = dataflow_manifest_chunk, - y = study_status_df, - by = "contributor_id", - all.x = TRUE + # merge study status into dataflow manifest chunk + dataflow_manifest_chunk <- merge( + x = dataflow_manifest_chunk, + y = study_status_df, + by = "contributor_id", + all.x = TRUE ) + } + # find attributes that are not present in provided manifest chunk missing_attributes_df <- attributes_df[!attributes_df$Attribute %in% names(dataflow_manifest_chunk), ] @@ -246,6 +247,10 @@ fill_dataflow_manifest <- function(dataflow_manifest_chunk, # FIXME: is this what we want to happen if data flow schema changes? dataflow_manifest <- dplyr::bind_cols(dataflow_manifest_chunk, missing_attributes_filled) + # REMOVE contributor_id column + contributor_id_idx <- grep("contributor_id", names(dataflow_manifest)) + dataflow_manifest <- dataflow_manifest[, -contributor_id_idx] + # return filled columns with original manifest chunk return(dataflow_manifest) } @@ -324,22 +329,6 @@ update_data_flow_manifest <- function(asset_view, } ) - # synapse_manifests <- tryCatch( - # { - # get_all_manifests( - # asset_view = asset_view, - # na_replace = na_replace, - # access_token = access_token, - # base_url = base_url, - # verbose = FALSE - # ) - # }, - # error = function(e) { - # message("get_all_manifests failed") - # message(e) - # } - # ) - # check synapse for new datasets dataflow_manifest_updated <- update_manifest_add_datasets( dataflow_manifest = dataflow_manifest, @@ -463,7 +452,7 @@ update_manifest_add_datasets <- function(dataflow_manifest, } ) - new_datasets$num_items <- num_items + new_datasets$num_items <- as.integer(num_items) # fill data flow manifest rows for missing datasets new_datasets <- fill_dataflow_manifest( @@ -537,7 +526,7 @@ update_manifest_column <- function(dataflow_manifest, idx <- dataflow_manifest[, update_column] != get_all_manifests_out[, update_column] # if any items have changed update dataset type column - if (any(idx)) { + if (any(isTRUE(idx))) { n_changed <- sum(idx) print(paste0("Making ", n_changed, " update(s) to ", update_column, " column")) dataflow_manifest[idx, update_column] <- get_all_manifests_out[idx, update_column]