From a052e78a187bd2f72326f911156665eb2a7b798c Mon Sep 17 00:00:00 2001 From: Collin Schwantes Date: Mon, 9 Dec 2024 14:23:45 -0500 Subject: [PATCH] make schema and metadata df the same length --- NEWS.md | 3 +++ R/create_structural_metadata.R | 14 +++++++------- R/modify_frictionless_metadata.R | 16 +++++++++++++--- man/create_structural_metadata.Rd | 17 +++++++++-------- man/expand_frictionless_metadata.Rd | 6 ++++-- man/prune_datapackage.Rd | 2 +- vignettes/data_examples/my_data.csv | 22 +++++++++++----------- 7 files changed, 48 insertions(+), 32 deletions(-) diff --git a/NEWS.md b/NEWS.md index 70aa1dd..ab8e724 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,8 @@ # ohcleandat 0.3.12 +* `expand_frictionless_metadata` can add and remove fields from the metadata depending +on the structural metadata supplied. + # ohcleandat 0.3.11 * obfuscate gps can now handle NAs diff --git a/R/create_structural_metadata.R b/R/create_structural_metadata.R index a207b41..546ad92 100644 --- a/R/create_structural_metadata.R +++ b/R/create_structural_metadata.R @@ -15,13 +15,13 @@ #' #' The metadata table produced has the following elements #' -#' `name` = The name of the field. This is taken as is from `data`. -#' `description` = Description of that field. May be provided by controlled vocabulary -#' `units` = Units of measure for that field. May or may not apply -#' `term_uri` = Universal Resource Identifier for a term from a controlled vocabulary or schema -#' `comments` = Free text providing additional details about the field -#' `primary_key` = `TRUE` or `FALSE`, Uniquely identifies each record in the data -#' `foreign_key` = `TRUE` or `FALSE`, Allows for linkages between data sets. Uniquely identifies +#' - `name` = The name of the field. This is taken as is from `data`. +#' - `description` = Description of that field. May be provided by controlled vocabulary +#' - `units` = Units of measure for that field. May or may not apply +#' - `term_uri` = Universal Resource Identifier for a term from a controlled vocabulary or schema +#' - `comments` = Free text providing additional details about the field +#' - `primary_key` = `TRUE` or `FALSE`, Uniquely identifies each record in the data +#' - `foreign_key` = `TRUE` or `FALSE`, Allows for linkages between data sets. Uniquely identifies #' records in a different data set #' #' diff --git a/R/modify_frictionless_metadata.R b/R/modify_frictionless_metadata.R index f49eb8d..d487861 100644 --- a/R/modify_frictionless_metadata.R +++ b/R/modify_frictionless_metadata.R @@ -1,7 +1,9 @@ #' Expand Frictionless Metadata with structural metadata #' -#' Loops over elements in the structural metadata and adds them to frictionless -#' metadata schema. Will overwrite existing values. +#' Loops over elements in the structural metadata and adds +#' them to the frictionless metadata schema. Will overwrite existing values and +#' remove any fields from the datapackage metadata not listed in the structural +#' metadata. #' #' @param structural_metadata Dataframe. Structural metadata from #' `create_structural_metadata` or `update_structural_metadata` @@ -61,6 +63,11 @@ expand_frictionless_metadata <- function(structural_metadata, ## build up schema based on structural metadata + ## drop fields that were removed from the structural metadata + if(nrow(structural_metadata) <= length(my_data_schema$fields)){ + my_data_schema$fields <- my_data_schema$fields[1:nrow(structural_metadata)] + } + # for each row, update the schema for(idx in 1:nrow(structural_metadata)){ # item to build out @@ -101,6 +108,7 @@ expand_frictionless_metadata <- function(structural_metadata, } + ## prune the properties of items in the schema, does not remove fields if(prune_datapackage){ my_data_schema <- prune_datapackage(my_data_schema,structural_metadata) } @@ -120,7 +128,7 @@ expand_frictionless_metadata <- function(structural_metadata, } -#' Prune data pacakge +#' Prune field properties in a data package #' #' method to remove properties from the metadata for a dataset in a datapackage #' @@ -152,3 +160,5 @@ prune_datapackage <- function(my_data_schema, structural_metadata){ return(my_data_schema_pruned) } + + diff --git a/man/create_structural_metadata.Rd b/man/create_structural_metadata.Rd index 9445cc4..d6d3ceb 100644 --- a/man/create_structural_metadata.Rd +++ b/man/create_structural_metadata.Rd @@ -31,16 +31,17 @@ generated then joined to pre-existing metadata via field names. } \details{ The metadata table produced has the following elements - -\code{name} = The name of the field. This is taken as is from \code{data}. -\code{description} = Description of that field. May be provided by controlled vocabulary -\code{units} = Units of measure for that field. May or may not apply -\code{term_uri} = Universal Resource Identifier for a term from a controlled vocabulary or schema -\code{comments} = Free text providing additional details about the field -\code{primary_key} = \code{TRUE} or \code{FALSE}, Uniquely identifies each record in the data -\code{foreign_key} = \code{TRUE} or \code{FALSE}, Allows for linkages between data sets. Uniquely identifies +\itemize{ +\item \code{name} = The name of the field. This is taken as is from \code{data}. +\item \code{description} = Description of that field. May be provided by controlled vocabulary +\item \code{units} = Units of measure for that field. May or may not apply +\item \code{term_uri} = Universal Resource Identifier for a term from a controlled vocabulary or schema +\item \code{comments} = Free text providing additional details about the field +\item \code{primary_key} = \code{TRUE} or \code{FALSE}, Uniquely identifies each record in the data +\item \code{foreign_key} = \code{TRUE} or \code{FALSE}, Allows for linkages between data sets. Uniquely identifies records in a different data set } +} \examples{ \dontrun{ df <- data.frame(a = 1:10, b = letters[1:10]) diff --git a/man/expand_frictionless_metadata.Rd b/man/expand_frictionless_metadata.Rd index c6af2dd..5adf538 100644 --- a/man/expand_frictionless_metadata.Rd +++ b/man/expand_frictionless_metadata.Rd @@ -29,8 +29,10 @@ be removed?} Updates the datapackage, returns nothing } \description{ -Loops over elements in the structural metadata and adds them to frictionless -metadata schema. Will overwrite existing values. +Loops over elements in the structural metadata and adds +them to the frictionless metadata schema. Will overwrite existing values and +remove any fields from the datapackage metadata not listed in the structural +metadata. } \examples{ \dontrun{ diff --git a/man/prune_datapackage.Rd b/man/prune_datapackage.Rd index 17dab6e..acd14d8 100644 --- a/man/prune_datapackage.Rd +++ b/man/prune_datapackage.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/modify_frictionless_metadata.R \name{prune_datapackage} \alias{prune_datapackage} -\title{Prune data pacakge} +\title{Prune field properties in a data package} \usage{ prune_datapackage(my_data_schema, structural_metadata) } diff --git a/vignettes/data_examples/my_data.csv b/vignettes/data_examples/my_data.csv index 8846be9..b849f8c 100644 --- a/vignettes/data_examples/my_data.csv +++ b/vignettes/data_examples/my_data.csv @@ -1,11 +1,11 @@ -"date","measurement","measured_by","site_name","key" -2024-08-26,43,"Johana","c",1 -2024-08-27,9,"Johana","c",2 -2024-08-28,79,"Johana","c",3 -2024-08-29,17,"Collin","a",4 -2024-08-30,61,"Johana","e",5 -2024-08-31,30,"Collin","b",6 -2024-09-01,58,"Collin","a",7 -2024-09-02,27,"Johana","d",8 -2024-09-03,52,"Johana","d",9 -2024-09-04,82,"Collin","e",10 +date,measurement,measured_by,site_name,key +2024-08-26,43,Johana,c,1 +2024-08-27,9,Johana,c,2 +2024-08-28,79,Johana,c,3 +2024-08-29,17,Collin,a,4 +2024-08-30,61,Johana,e,5 +2024-08-31,30,Collin,b,6 +2024-09-01,58,Collin,a,7 +2024-09-02,27,Johana,d,8 +2024-09-03,52,Johana,d,9 +2024-09-04,82,Collin,e,10