From 8f73a5ae738c6f66b93b0261642a3bc917195f23 Mon Sep 17 00:00:00 2001 From: Dave Slager Date: Thu, 25 Apr 2024 22:20:27 -0700 Subject: [PATCH] spell check and update WORDLIST --- DESCRIPTION | 4 +- NEWS.md | 12 ++-- R/DataPackageR-package.R | 8 +-- R/digests.R | 2 +- README.Rmd | 12 ++-- README.md | 24 ++++---- inst/WORDLIST | 74 ++++++++++++++---------- man/DataPackageR-package.Rd | 4 +- man/DataPackageR_options.Rd | 8 +-- vignettes/Using_DataPackageR.Rmd | 24 ++++---- vignettes/YAML_Configuration_Details.Rmd | 2 +- 11 files changed, 95 insertions(+), 79 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index d9daf16..be780e5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,10 +27,10 @@ Authors@R: email = "jmtaylor@fredhutch.org"), person(given = "Kara", family = "Woo", role = "rev", - comment = "Kara reviewed the package for ropensci, see "), + comment = "Kara reviewed the package for rOpenSci, see "), person(given = "William", family = "Landau", role = "rev", - comment = "William reviewed the package for ropensci, see ")) + comment = "William reviewed the package for rOpenSci, see ")) Version: 0.15.9.9000 Description: A framework to help construct R data packages in a reproducible manner. Potentially time consuming processing of diff --git a/NEWS.md b/NEWS.md index b5d3ea5..f4f5f26 100644 --- a/NEWS.md +++ b/NEWS.md @@ -61,13 +61,13 @@ * Data objects are incrementally stored during the build process, into the render_root directory specified in the datapackager.yml config file. # DataPackageR 0.15.3 -* conditional tests when pandoc is missing (ropensci/DataPackager/issues/46) -* add use_data_object and use_processing_script (ropensci/DataPackager/issues/44) -* allow datapackage_skeleton to be called without files or data objects for interactive construction. (ropensci/DataPackager/issues/44) +* conditional tests when pandoc is missing (ropensci/DataPackageR/issues/46) +* add use_data_object and use_processing_script (ropensci/DataPackageR/issues/44) +* allow datapackage_skeleton to be called without files or data objects for interactive construction. (ropensci/DataPackageR/issues/44) # DataPackageR 0.15.2 -* Add pandoc to SystemRequirements (ropensci/DataPackager/issues/46) -* Add use_raw_dataset() method (and tests) to add data sets to inst/extdata. interactively. (ropensci/DataPackager/issues/44) +* Add pandoc to SystemRequirements (ropensci/DataPackageR/issues/46) +* Add use_raw_dataset() method (and tests) to add data sets to inst/extdata. interactively. (ropensci/DataPackageR/issues/44) # DataPackageR 0.15.1.9000 * Development version @@ -81,7 +81,7 @@ # DataPackageR 0.14.9 -- Moving towards ropensci compliance +- Moving towards rOpenSci compliance - NEWS.md updated with description of changes to data sets when version is bumped (or new package is created). - Output of "next steps" for user when package is built - New `document()` function to rebuild docs from `documentation.R` in `data-raw` without rebuilding the whole package. diff --git a/R/DataPackageR-package.R b/R/DataPackageR-package.R index 1217c0d..c0fc649 100644 --- a/R/DataPackageR-package.R +++ b/R/DataPackageR-package.R @@ -108,12 +108,12 @@ NULL #' \code{package_build()}. See the example above and the #' \href{https://ropensci.org/blog/2018/09/18/datapackager/}{rOpenSci blog #' post} for more details on how to set this to FALSE, which will never prompt -#' user for a NEWS update. FALSE is also the setting used for DataPackageR's -#' internal tests. +#' user for a NEWS update. FALSE is also the setting used for DataPackageR +#' internal package tests. #' #' - \code{DataPackageR_verbose}: Default upon package load is TRUE. FALSE suppresses -#' all console output and is currently only used for DataPackageR's automated -#' unit tests. +#' all console output and is currently only used for automated +#' unit tests of the DataPackageR package. #' #' - \code{DataPackageR_packagebuilding}: Default upon package load is FALSE. This #' option is used internally for package operations and changing it is not diff --git a/R/digests.R b/R/digests.R index 1b46608..8b59a3a 100644 --- a/R/digests.R +++ b/R/digests.R @@ -31,7 +31,7 @@ .compare_digests <- function(old_digest, new_digest) { # Returns FALSE when any exisiting data has is changed, new data is added, or data is removed, else return TRUE. - # Use .mutlilog_warn when there is a change and multilog_debug when new data is added. + # Use .multilog_warn when there is a change and multilog_debug when new data is added. existed <- names(new_digest)[names(new_digest) %in% names(old_digest)] added <- setdiff(names(new_digest), existed) diff --git a/README.Rmd b/README.Rmd index 385b9c7..18132bb 100644 --- a/README.Rmd +++ b/README.Rmd @@ -58,7 +58,7 @@ You have diverse raw data sets that you need to preprocess and tidy in order to: - **Collaboration.** - A data set packaged in R is easy to distribute and share amongst collaborators, and is easy to install and use. + A data set packaged in R is easy to distribute and share among collaborators, and is easy to install and use. All the hard work you've put into documenting and standardizing the tidy data set comes right along with the data package. - **Documentation.** @@ -75,7 +75,7 @@ You have diverse raw data sets that you need to preprocess and tidy in order to: - **Package size limits.** - R packages have a 10MB size limit, at least on [CRAN](https://cran.r-project.org/web/packages/policies.html). BioConductor [ExperimentHub](http://contributions.bioconductor.org/data.html#data) may be able to support larger data packages. + R packages have a 10MB size limit, at least on [CRAN](https://cran.r-project.org/web/packages/policies.html). Bioconductor [ExperimentHub](http://contributions.bioconductor.org/data.html#data) may be able to support larger data packages. Sharing large volumes of raw data in an R package format is still not ideal, and there are public biological data repositories better suited for raw data: e.g., [GEO](https://www.ncbi.nlm.nih.gov/geo/), [SRA](https://www.ncbi.nlm.nih.gov/sra), [ImmPort](https://www.immport.org/), [ImmuneSpace](https://immunespace.org/), [FlowRepository](http://flowrepository.org/). @@ -131,7 +131,7 @@ There are a number of tools out there that address similar and complementary pro - **rrrpkg** [github repo](https://github.com/ropensci/rrrpkg) - A document from ropensci describing using an R package as a research compendium. Based on ideas originally introduced by Robert Gentleman and Duncan Temple Lang (Gentleman and Lang (2004)) + A document from rOpenSci describing using an R package as a research compendium. Based on ideas originally introduced by Robert Gentleman and Duncan Temple Lang (Gentleman and Lang (2004)) - **template** [github repo](https://github.com/ropensci/rrrpkg) @@ -260,10 +260,10 @@ DataPackageR::project_data_path() ## Preprint and publication. -The publication describing the package, (Finak *et* *al.,* 2018), is now available at [Gates Open Research](https://gatesopenresearch.org/articles/2-31/v2) . +The publication describing the package, (Finak *et al.*, 2018), is now available at [Gates Open Research](https://gatesopenresearch.org/articles/2-31/v2) . -The preprint is on [biorxiv](https://doi.org/10.1101/342907). +The preprint is on [bioRxiv](https://doi.org/10.1101/342907). ## Code of conduct @@ -276,7 +276,7 @@ Please note that this project is released with a [Contributor Code of Conduct](h 1. Gentleman, Robert, and Duncan Temple Lang. 2004. “Statistical Analyses and Reproducible Research.” Bioconductor Project Working Papers, Bioconductor project working papers,. bepress. 2. Finak G, Mayer B, Fulp W, et al. DataPackageR: Reproducible data preprocessing, standardization and sharing using R/Bioconductor for collaborative data analysis. Gates Open Res 2018, 2:31 -(doi: 10.12688/gatesopenres.12832.1) +(DOI: 10.12688/gatesopenres.12832.1) diff --git a/README.md b/README.md index a57c7cc..34c327d 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ purpose is to contain, access, and / or document data sets. - **Collaboration.** - A data set packaged in R is easy to distribute and share amongst + A data set packaged in R is easy to distribute and share among collaborators, and is easy to install and use. All the hard work you’ve put into documenting and standardizing the tidy data set comes right along with the data package. @@ -84,7 +84,7 @@ purpose is to contain, access, and / or document data sets. R packages have a 10MB size limit, at least on [CRAN](https://cran.r-project.org/web/packages/policies.html). - BioConductor + Bioconductor [ExperimentHub](http://contributions.bioconductor.org/data.html#data) may be able to support larger data packages. @@ -169,7 +169,7 @@ complementary problems: - **rrrpkg** [github repo](https://github.com/ropensci/rrrpkg) - A document from ropensci describing using an R package as a research + A document from rOpenSci describing using an R package as a research compendium. Based on ideas originally introduced by Robert Gentleman and Duncan Temple Lang (Gentleman and Lang (2004)) @@ -234,8 +234,8 @@ datapackage_skeleton( code_files = processing_code, r_object_names = "cars_over_20", path = tempdir()) -#> ✔ Creating 'C:/Users/dslager/AppData/Local/Temp/RtmpuqBumR/mtcars20/' -#> ✔ Setting active project to 'C:/Users/dslager/AppData/Local/Temp/RtmpuqBumR/mtcars20' +#> ✔ Creating 'C:/Users/dslager/AppData/Local/Temp/RtmpaIslsQ/mtcars20/' +#> ✔ Setting active project to 'C:/Users/dslager/AppData/Local/Temp/RtmpaIslsQ/mtcars20' #> ✔ Creating 'R/' #> ✔ Writing 'DESCRIPTION' #> Package: mtcars20 @@ -251,7 +251,7 @@ datapackage_skeleton( #> RoxygenNote: 7.3.1 #> ✔ Writing 'NAMESPACE' #> ✔ Setting active project to '' -#> ✔ Setting active project to 'C:/Users/dslager/AppData/Local/Temp/RtmpuqBumR/mtcars20' +#> ✔ Setting active project to 'C:/Users/dslager/AppData/Local/Temp/RtmpaIslsQ/mtcars20' #> ✔ Added DataVersion string to 'DESCRIPTION' #> ✔ Creating 'data-raw/' #> ✔ Creating 'data/' @@ -279,13 +279,13 @@ package_build(packageName = file.path(tempdir(),"mtcars20")) #> Writing 'mtcars20.Rd' #> Writing 'cars_over_20.Rd' #> ── R CMD build ───────────────────────────────────────────────────────────────── -#> checking for file 'C:\Users\dslager\AppData\Local\Temp\RtmpuqBumR\mtcars20/DESCRIPTION' ... ✔ checking for file 'C:\Users\dslager\AppData\Local\Temp\RtmpuqBumR\mtcars20/DESCRIPTION' (341ms) +#> checking for file 'C:\Users\dslager\AppData\Local\Temp\RtmpaIslsQ\mtcars20/DESCRIPTION' ... ✔ checking for file 'C:\Users\dslager\AppData\Local\Temp\RtmpaIslsQ\mtcars20/DESCRIPTION' (432ms) #> ─ preparing 'mtcars20': #> checking DESCRIPTION meta-information ... checking DESCRIPTION meta-information ... ✔ checking DESCRIPTION meta-information #> ─ checking for LF line-endings in source and make files and shell scripts #> ─ checking for empty or unneeded directories #> ─ looking to see if a 'data/datalist' file should be added -#> ─ building 'mtcars20_1.0.tar.gz' +#> ─ building 'mtcars20_1.0.tar.gz' #> #> Next Steps #> 1. Update your package documentation. @@ -298,7 +298,7 @@ package_build(packageName = file.path(tempdir(),"mtcars20")) #> - Set up a github repository for your pacakge. #> - Add the github repository as a remote of your local package repository. #> - git push your local repository to gitub. -#> [1] "C:/Users/dslager/AppData/Local/Temp/RtmpuqBumR/mtcars20_1.0.tar.gz" +#> [1] "C:/Users/dslager/AppData/Local/Temp/RtmpaIslsQ/mtcars20_1.0.tar.gz" # Update the autogenerated roxygen documentation in data-raw/documentation.R. # edit(file.path(tempdir(),"mtcars20","R","mtcars20.R")) @@ -377,11 +377,11 @@ DataPackageR::project_data_path() ## Preprint and publication. -The publication describing the package, (Finak *et* *al.,* +The publication describing the package, (Finak *et al.*, 2018), is now available at [Gates Open Research](https://gatesopenresearch.org/articles/2-31/v2) . -The preprint is on [biorxiv](https://doi.org/10.1101/342907). +The preprint is on [bioRxiv](https://doi.org/10.1101/342907). ## Code of conduct @@ -397,7 +397,7 @@ By participating in this project you agree to abide by its terms. 2. Finak G, Mayer B, Fulp W, et al. DataPackageR: Reproducible data preprocessing, standardization and sharing using R/Bioconductor for - collaborative data analysis. Gates Open Res 2018, 2:31 (doi: + collaborative data analysis. Gates Open Res 2018, 2:31 (DOI: 10.12688/gatesopenres.12832.1) [![ropensci_footer](https://ropensci.org/public_images/ropensci_footer.png)](https://ropensci.org) diff --git a/inst/WORDLIST b/inst/WORDLIST index 4d428b3..a212988 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,46 +1,62 @@ -amongst -autogenerated CMD +Codecov +Coopting +DATADIGEST +DOI +DataONE +DataVersion +ExperimentHub +FlowRepository +Hadley +ImmPort +ImmuneSpace +ORCID +Pre +Preprint +README +RMarkdown +Rbuildignore +Reproducibility +Rmd +SRA +SystemRequirements +Wickham's +YAML +al +bepress +bioRxiv config csv -DATADIGEST +datapack datapackage datapackager -DataVersion -Dont +datastorr +et extdata -filesystem -Finak -fredhutch -gfinak +fst +gatesopenres github -graphicx -Hadley +gitignore +https incrementing -inputenc -knitr -maintinaing +loc md mtcars -mutli mydata -objecs -pacakge -Pre -procesing -rda -README +onboarding +pandoc +pre +preprint +preprocess +rOpenSci +repo reproducibility reproducibly rmarkdown -Rmd -toc -usepackage +ropensci +roxygen +rrrpkg useR -utf -VignetteEngine -VignetteIndexEntry -Wickham's +usethis yaml -YAML yml diff --git a/man/DataPackageR-package.Rd b/man/DataPackageR-package.Rd index 74b8bc2..d1abf63 100644 --- a/man/DataPackageR-package.Rd +++ b/man/DataPackageR-package.Rd @@ -112,8 +112,8 @@ Other contributors: \item Jimmy Fulp \email{williamjfulp@gmail.com} [contributor] \item Marie Vendettuoli (\href{https://orcid.org/0000-0001-9321-1410}{ORCID}) [contributor] \item Jason Taylor \email{jmtaylor@fredhutch.org} [contributor] - \item Kara Woo (Kara reviewed the package for ropensci, see ) [reviewer] - \item William Landau (William reviewed the package for ropensci, see ) [reviewer] + \item Kara Woo (Kara reviewed the package for rOpenSci, see ) [reviewer] + \item William Landau (William reviewed the package for rOpenSci, see ) [reviewer] } } diff --git a/man/DataPackageR_options.Rd b/man/DataPackageR_options.Rd index 125fba7..6c00551 100644 --- a/man/DataPackageR_options.Rd +++ b/man/DataPackageR_options.Rd @@ -24,12 +24,12 @@ User-configurable options consulted by DataPackageR, which \code{package_build()}. See the example above and the \href{https://ropensci.org/blog/2018/09/18/datapackager/}{rOpenSci blog post} for more details on how to set this to FALSE, which will never prompt - user for a NEWS update. FALSE is also the setting used for DataPackageR's - internal tests. + user for a NEWS update. FALSE is also the setting used for DataPackageR + internal package tests. - \code{DataPackageR_verbose}: Default upon package load is TRUE. FALSE suppresses - all console output and is currently only used for DataPackageR's automated - unit tests. + all console output and is currently only used for automated + unit tests of the DataPackageR package. - \code{DataPackageR_packagebuilding}: Default upon package load is FALSE. This option is used internally for package operations and changing it is not diff --git a/vignettes/Using_DataPackageR.Rmd b/vignettes/Using_DataPackageR.Rmd index 333d249..7c3bca6 100644 --- a/vignettes/Using_DataPackageR.Rmd +++ b/vignettes/Using_DataPackageR.Rmd @@ -22,7 +22,7 @@ knitr::opts_chunk$set( ## Purpose -This vignette demonstrates how to use DataPackageR to build a data package. DataPackageR aims to simplify data package construction. It provides mechanisms for reproducibly preprocessing and tidying raw data into into documented, versioned, and packaged analysis-ready data sets. Long-running or computationally intensive data processing can be decoupled from the usual `R CMD build` process while maintinaing [data lineage](https://en.wikipedia.org/wiki/Data_lineage). +This vignette demonstrates how to use DataPackageR to build a data package. DataPackageR aims to simplify data package construction. It provides mechanisms for reproducibly preprocessing and tidying raw data into into documented, versioned, and packaged analysis-ready data sets. Long-running or computationally intensive data processing can be decoupled from the usual `R CMD build` process while maintaining [data lineage](https://en.wikipedia.org/wiki/Data_lineage). For demonstration purposes, in this vignette we will subset and package the `mtcars` data set. @@ -62,7 +62,7 @@ DataPackageR::datapackage_skeleton(name = "mtcars20", ### What's in the package skeleton structure? -The process above has created a DataPackageR source tree named "mtcars20" in a temporary directory. For a real use case, you would pick a path on your filesystem where you could then initialize a new github repository for the package. +The process above has created a DataPackageR source tree named "mtcars20" in a temporary directory. For a real use case, you would pick a path on your file system where you could then initialize a new GitHub repository for the package. The contents of `mtcars20` are: @@ -114,7 +114,7 @@ The `datapackage_skeleton()` API has the `raw_data_dir` argument, which will cop In this example we are reading the `mtcars` data set that is already in memory, rather than from the file system. -### An API to read raw data sets from within an R or Rmd procesing script. +### An API to read raw data sets from within an R or Rmd processing script. As stated in the README, in order for your processing scripts to be portable, you should not use absolute paths to files. DataPackageR provides an API to point to the data package root directory and the `inst/extdata` and `data` subdirectories. These are useful for constructing portable paths in your code to read files from these locations. @@ -198,13 +198,13 @@ df <- data.frame( as.Node(df) ``` -### Update the autogenerated documentation. +### Update the auto-generated documentation. -After the first build, the `R` directory contains `mtcars.R` that has autogenerated `roxygen2` markup documentation for the data package and for the `cars_over20` packaged data. +After the first build, the `R` directory contains `mtcars.R` that has auto-generated `roxygen2` markup documentation for the data package and for the `cars_over20` packaged data. The processed `Rd` files can be found in `man`. -The autogenerated documentation source is in the `documentation.R` file in `data-raw`. +The auto-generated documentation source is in the `documentation.R` file in `data-raw`. You should update this file to properly document your objects. Then rebuild the documentation: @@ -348,9 +348,9 @@ This is useful in situations where we have multiple data sets, and we want to re # Multi-script pipelines. -We may have situations where we have mutli-script pipelines. There are two ways to share data among scripts. +We may have situations where we have multi-script pipelines. There are two ways to share data among scripts. -1. filesystem artifacts +1. file system artifacts 2. data objects passed to subsequent scripts ## File system artifacts. @@ -367,7 +367,7 @@ For example, `script2.Rmd` will run after `script1.Rmd`. `script2.Rmd` needs to `dataset1 <- DataPackageR::datapackager_object_read("dataset1")`. -Passing of data objects amongst scripts can be turned off via: +Passing of data objects among scripts can be turned off via: `package_build(deps = FALSE)` @@ -381,13 +381,13 @@ You now have a data package source tree. - **Place your package under version control** 1. Call `git init` in the package source root to initialize a new git repository. - 2. [Create a new repository for your data package on github](https://docs.github.com/articles/create-a-repo/). - 3. Push your local package repository to `github`. [see step 7](https://docs.github.com/articles/adding-an-existing-project-to-github-using-the-command-line/) + 2. [Create a new repository for your data package on GitHub](https://docs.github.com/articles/create-a-repo/). + 3. Push your local package repository to `GitHub`. [see step 7](https://docs.github.com/articles/adding-an-existing-project-to-github-using-the-command-line/) This will let you version control your data processing code, and will provide a mechanism for sharing your package with others. -For more details on using git and github with R, there is an excellent guide provided by Jenny Bryan: [Happy Git and GitHub for the useR](https://happygitwithr.com/) and Hadley Wickham's [book on R packages]( https://r-pkgs.org/). +For more details on using git and GitHub with R, there is an excellent guide provided by Jenny Bryan: [Happy git and GitHub for the useR](https://happygitwithr.com/) and Hadley Wickham's [book on R packages]( https://r-pkgs.org/). # Additional Details. diff --git a/vignettes/YAML_Configuration_Details.Rmd b/vignettes/YAML_Configuration_Details.Rmd index 99133e1..c372b71 100644 --- a/vignettes/YAML_Configuration_Details.Rmd +++ b/vignettes/YAML_Configuration_Details.Rmd @@ -49,7 +49,7 @@ Each file itself has just one property: A logical `yes`, `no` flag indicating whether the file should be rendered during the build, or whether it should be skipped. - This is useful for 'turning off' long running processing tasks if they have not changed. Disabling processing of a file will not overwrite existing documentation or data objecs created during previous builds. + This is useful for 'turning off' long running processing tasks if they have not changed. Disabling processing of a file will not overwrite existing documentation or data objects created during previous builds. - `objects:`