Skip to content

Commit

Permalink
Merge pull request #53 from Sage-Bionetworks/fds-1170-scheduled-job-b…
Browse files Browse the repository at this point in the history
…ug-fixes

make updates to scheduled job functions
  • Loading branch information
lakikowolfe authored Apr 30, 2024
2 parents 3f6d6aa + fd9c1dd commit cae3afd
Showing 1 changed file with 19 additions and 30 deletions.
49 changes: 19 additions & 30 deletions R/manifest.R
Original file line number Diff line number Diff line change
Expand Up @@ -206,21 +206,22 @@ fill_dataflow_manifest <- function(dataflow_manifest_chunk,
access_token = access_token
)
})
}

# Convert the named vector into a dataframe
study_status_df <- data.frame(
contributor_id = names(study_status),
study_status = study_status
)
# Convert the named vector into a dataframe
study_status_df <- data.frame(
contributor_id = names(study_status),
study_status = study_status
)

# merge study status into dataflow manifest chunk
dataflow_manifest_chunk <- merge(
x = dataflow_manifest_chunk,
y = study_status_df,
by = "contributor_id",
all.x = TRUE
# merge study status into dataflow manifest chunk
dataflow_manifest_chunk <- merge(
x = dataflow_manifest_chunk,
y = study_status_df,
by = "contributor_id",
all.x = TRUE
)
}


# find attributes that are not present in provided manifest chunk
missing_attributes_df <- attributes_df[!attributes_df$Attribute %in% names(dataflow_manifest_chunk), ]
Expand All @@ -246,6 +247,10 @@ fill_dataflow_manifest <- function(dataflow_manifest_chunk,
# FIXME: is this what we want to happen if data flow schema changes?
dataflow_manifest <- dplyr::bind_cols(dataflow_manifest_chunk, missing_attributes_filled)

# REMOVE contributor_id column
contributor_id_idx <- grep("contributor_id", names(dataflow_manifest))
dataflow_manifest <- dataflow_manifest[, -contributor_id_idx]

# return filled columns with original manifest chunk
return(dataflow_manifest)
}
Expand Down Expand Up @@ -324,22 +329,6 @@ update_data_flow_manifest <- function(asset_view,
}
)

# synapse_manifests <- tryCatch(
# {
# get_all_manifests(
# asset_view = asset_view,
# na_replace = na_replace,
# access_token = access_token,
# base_url = base_url,
# verbose = FALSE
# )
# },
# error = function(e) {
# message("get_all_manifests failed")
# message(e)
# }
# )

# check synapse for new datasets
dataflow_manifest_updated <- update_manifest_add_datasets(
dataflow_manifest = dataflow_manifest,
Expand Down Expand Up @@ -463,7 +452,7 @@ update_manifest_add_datasets <- function(dataflow_manifest,
}
)

new_datasets$num_items <- num_items
new_datasets$num_items <- as.integer(num_items)

# fill data flow manifest rows for missing datasets
new_datasets <- fill_dataflow_manifest(
Expand Down Expand Up @@ -537,7 +526,7 @@ update_manifest_column <- function(dataflow_manifest,
idx <- dataflow_manifest[, update_column] != get_all_manifests_out[, update_column]

# if any items have changed update dataset type column
if (any(idx)) {
if (any(isTRUE(idx))) {
n_changed <- sum(idx)
print(paste0("Making ", n_changed, " update(s) to ", update_column, " column"))
dataflow_manifest[idx, update_column] <- get_all_manifests_out[idx, update_column]
Expand Down

0 comments on commit cae3afd

Please sign in to comment.