diff --git a/bibliography/book.bib b/bibliography/book.bib index bbb9212..eb74cad 100644 --- a/bibliography/book.bib +++ b/bibliography/book.bib @@ -44,7 +44,7 @@ @manual{r-tigris author = {Kyle Walker}, year = 2023, url = {https://CRAN.R-project.org/package=tigris}, - note = {R package version 2.0.3} + note = {R Package Version 2.0.3} } @article{r-tmap, @@ -63,7 +63,7 @@ @manual{r-tidycensus author = {Kyle Walker and Matt Herman}, year = 2023, url = {https://CRAN.R-project.org/package=tidycensus}, - note = {R package version 1.4.4} + note = {R Package Version 1.4.4} } @book{walker2023analyzing, @@ -119,7 +119,7 @@ @manual{r-tidycensus author = {Kyle Walker and Matt Herman}, year = 2023, url = {https://CRAN.R-project.org/package=tidycensus}, - note = {R package version 1.4.4} + note = {R Package Version 1.4.4} } @manual{r-terra, @@ -127,7 +127,7 @@ @manual{r-terra author = {Robert J. Hijmans}, year = 2024, url = {https://CRAN.R-project.org/package=terra}, - note = {R package version 1.7-71} + note = {R Package Version 1.7-71} } @book{r-ggplot2, @@ -144,7 +144,7 @@ @manual{r-ggpubr author = {Alboukadel Kassambara}, year = 2023, url = {https://CRAN.R-project.org/package=ggpubr}, - note = {R package version 0.6.0} + note = {R Package Version 0.6.0} } @manual{r-sf-article1, @@ -165,7 +165,7 @@ @manual{r-dplyr author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller and Davis Vaughan}, year = 2023, url = {https://CRAN.R-project.org/package=dplyr}, - note = {R package version 1.1.4} + note = {R Package Version 1.1.4} } @manual{dplyr-introduction, @@ -176,7 +176,7 @@ @manual{dplyr-introduction } @article{r-tidyterra, - title = {Using the {tidyverse} with {terra} objects: the {tidyterra} package}, + title = {Using the {tidyverse} with {terra} Objects: the {tidyterra} Package}, author = {Diego Hernangómez}, year = 2023, journal = {Journal of Open Source Software}, @@ -212,7 +212,7 @@ @incollection{geocomputation-24 @book{lovelace_geocomputation_2019, title = {Geocomputation with {{R}}}, isbn = {1-138-30451-4}, - abstract = {Book on geographic data with R.}, + abstract = {Book on Geographic Data with {R}}, publisher = {{CRC Press}}, author = {Lovelace, Robin and Nowosad, Jakub and Muenchow, Jannes}, year = {2019}, diff --git a/bibliography/chicago-author-date-with-note.csl b/bibliography/chicago-author-date-with-note.csl index 87e83d1..4e40149 100644 --- a/bibliography/chicago-author-date-with-note.csl +++ b/bibliography/chicago-author-date-with-note.csl @@ -29,7 +29,7 @@ Brenton M. Wiernik - + The author-date variant of the Chicago style 2024-04-09T10:33:14+00:00 @@ -638,35 +638,17 @@ - - - - - - - - - - - - - - - - - - - - - - - + - - - + - + + + + + + + diff --git a/bibliography/chicago-numeric-with-note.csl b/bibliography/chicago-numeric-with-note.csl new file mode 100644 index 0000000..4e40149 --- /dev/null +++ b/bibliography/chicago-numeric-with-note.csl @@ -0,0 +1,672 @@ + + diff --git a/chapters/01-01-getting-started.Rmd b/chapters/01-01-getting-started.Rmd index e069504..6b6af9c 100644 --- a/chapters/01-01-getting-started.Rmd +++ b/chapters/01-01-getting-started.Rmd @@ -33,11 +33,11 @@ The following chapters in this unit will demonstrate how to use `R` to access, p ### Spatial Geometry -The _spatial geometry_ of a geospatial dataset is an important consideration in data analysis pipelines. +The _spatial geometry_ of a geospatial dataset is an important consideration in data analysis pipelines. There are three main spatial geometry types: point, line, and area (i.e., polygon or grid). _Points_ are represented by geographic coordinates (latitude and longitude pairs), _lines_ by a series of connected points, and _polygons_ by a series of connected points that completely enclose and define an area. In contrast to polygons, which can define irregular or non-uniform areas, _grids_ define regular and uniform areas (e.g., such that each grid cell has the same area). Point, line, and polygon data is referred to as _vector_ data, and grid data is referred to as _raster_ data. For detailed descriptions of vector data, raster data, and the differences between them, respectively, see [@r-sf-1], [@raster-data] and [@geocomputation-2]. -There are three main spatial geometry types: point, line, and area (i.e., polygon or grid). _Points_ are represented by geographic coordinates (latitude and longitude pairs), _lines_ by a series of connected points, and _polygons_ by a series of connected points that completely enclose and define an area. In contrast to polygons, which can define irregular or non-uniform areas, _grids_ define regular and uniform areas (e.g., such that each gridcell has the same area). Point, line, and polygon data is referred to as _vector_ data, and grid data is referred to as _raster_ data. For detailed descriptions of vector data, raster data, and the differences between them, respectively, see @r-sf-1, @raster-data, and @geocomputation-2. +The following table illustrates common examples of each spatial geometry type used in environmental health applications. -The following table illustrates common examples of each spatial geometry type used in environmental health applications: +
Spatial Geometry Types
|Type |Illustration |Examples |Tutorials | |------|-------------|----------------------------------------------|----------| @@ -50,7 +50,7 @@ The tutorials linked in the table above demonstrate exploratory analyses with ea ### Coordinate Reference Systems and Projections {#getting-started-spatial-crs} -Coordinate reference systems (CRS) are important for spatial analyses as they define how spatial data align with the Earth's surface [@geocomputation-24]. Transforming (projecting) the data to a different CRS may be necessary when combining multiple datasets or creating visuals for particular areas of interest. It is important to note that transforming spatial data can cause distortions in its area, direction, distance, or shape [@geocomputation-24]. The direction and magnitude of these distortions vary depending on the chosen CRS, area of interest, and type of data [@article_steinwand1995reprojection]. For guidance on selecting an appropriate CRS based on the data, area of interest, and analysis goals, see [_Choose the Right Projection_](https://learn.arcgis.com/en/projects/choose-the-right-projection/.) [@arcgis-crs] and [_Map Projections_](https://pubs.usgs.gov/gip/70047422/report.pdf) [@usgs-projections]. +Coordinate reference systems (CRS) are important for spatial analyses as they define how spatial data align with the Earth's surface [@geocomputation-24]. Transforming (projecting) the data to a different CRS may be necessary when combining multiple datasets or creating visuals for particular areas of interest. It is important to note that transforming spatial data can cause distortions in its area, direction, distance, or shape [@geocomputation-24]. The direction and magnitude of these distortions vary depending on the chosen CRS, area of interest, and type of data [@article_steinwand1995reprojection]. For guidance on selecting an appropriate CRS based on the data, area of interest, and analysis goals, see [@arcgis-crs; @usgs-projections]. ## Datasets @@ -69,7 +69,7 @@ The tutorials in this unit demonstrate the use of the following `R` packages: - **Spatial Analysis:** [`sf`](https://cran.r-project.org/web/packages/sf/sf.pdf) [@r-sf-1; @r-sf-2], [`terra`](https://cran.r-project.org/web/packages/terra/terra.pdf) [@r-terra], [`tidyterra`]((https://cran.r-project.org/web/packages/tidyterra/tidyterra.pdf)) [@r-tidyterra] -- **Data Visualization:** [`ggplot2`](https://cran.r-project.org/web/packages/ggplot2/ggplot2.pdf), [@r-ggplot2], [`ggpubr`](https://cran.r-project.org/web/packages/ggpubr/ggpubr.pdf) [@r-ggpubr] +- **Data Visualization:** [`ggplot2`](https://cran.r-project.org/web/packages/ggplot2/ggplot2.pdf) [@r-ggplot2], [`ggpubr`](https://cran.r-project.org/web/packages/ggpubr/ggpubr.pdf) [@r-ggpubr] - **Data Processing:** [`dplyr`](https://cran.r-project.org/web/packages/dplyr/dplyr.pdf) [@r-dplyr] diff --git a/chapters/01-02-spatial-point-data.Rmd b/chapters/01-02-spatial-point-data.Rmd index 1b6acae..769290b 100644 --- a/chapters/01-02-spatial-point-data.Rmd +++ b/chapters/01-02-spatial-point-data.Rmd @@ -104,7 +104,7 @@ str(pm, ### Subset -Checking the data's structure shows that `pm` is a very large data set. Each of the variables convey important information related to air pollution monitoring, but not all will be utilized in these exploratory analyses. +Checking the data structure shows that `pm` is a very large data set. Each of the variables convey important information related to air pollution monitoring, but not all will be utilized in these exploratory analyses. The data set can be reduced to include only the variables of interest with the `subset()` function. The `select =` argument indicates which variables to be retained in the new data set. @@ -121,7 +121,7 @@ pm <- subset(pm, select = c( )) ``` -Re-running `str(pm)` after subsetting the data set shows that all all observations (n = 590208) of the variables of interest (n = 8) have been retained. +Re-running `str(pm)` after subsetting the data set shows that all observations (n = 590,208) of the variables of interest (n = 8) have been retained. ```{r, eval = FALSE} str(pm, @@ -271,7 +271,7 @@ Each unique monitor identification code should be matched with a unique monitor length(unique(pm_sf$Monitor.ID)) == length(unique(pm_sf$geometry)) ``` -Now that each monitor location as a unique identification code, we can calculate the mean PM~2.5~ concentration measured at each monitoring location. Functions and syntax from the `dplyr` package will be used to do this. For more on the `dplyr` package, please see [Introduction to dplyr](https://dplyr.tidyverse.org/articles/dplyr.html). +Now that each monitor location has a unique identification code, we can calculate the mean PM~2.5~ concentration measured at each monitoring location. Functions and syntax from the `dplyr` package will be used to do this. For more on the `dplyr` package, please see [Introduction to dplyr](https://dplyr.tidyverse.org/articles/dplyr.html). The `group_by(Monitor.ID, )` argument specifies that an annual mean should be calculated for each unique `Monitor.ID`. Including `State.Name` in this argument retains the column in the new `pm_mean` data set, but does not influence the calculation of the annual mean. diff --git a/chapters/01-03-spatial-polygon-data.Rmd b/chapters/01-03-spatial-polygon-data.Rmd index 8c0dc4d..580c687 100644 --- a/chapters/01-03-spatial-polygon-data.Rmd +++ b/chapters/01-03-spatial-polygon-data.Rmd @@ -186,7 +186,7 @@ smoke_sf <- st_transform( With the data prepared, plot the wildfire smoke plume polygons with `ggplot2::ggplot()`. -Now that the parameters of interest and coordinate reference system have been prepared, create a plot with `ggplot2::ggplot()`. Identifying the data set to be plotted within the `geom_sf()` arguement informs the function that the data is an `sf` object. +Now that the parameters of interest and coordinate reference system have been prepared, create a plot with `ggplot2::ggplot()`. Identifying the data set to be plotted within the `geom_sf()` argument informs the function that the data is an `sf` object. ```{r, eval = FALSE} ggplot() + diff --git a/chapters/03-01-NASA_EarthData_account.Rmd b/chapters/03-01-NASA_EarthData_account.Rmd index 2246f14..ef8176b 100644 --- a/chapters/03-01-NASA_EarthData_account.Rmd +++ b/chapters/03-01-NASA_EarthData_account.Rmd @@ -49,19 +49,25 @@ Users will: ## NASA EarthData Account -### Register or log in +### Register or Log In Visit [https://urs.earthdata.nasa.gov/](http://urs.earthdata.nasa.gov) to register for or log into a NASA EarthData account. -![NASA EarthData Account Landing Page](./images/nasa_earthdata/NASA_EarthData_login.png) +::: {.figure} + +
NASA EarthData Account Landing Page
+::: -### Approved applications +### Approved Applications After creating an account, navigate to "My Profile"(), and then to "Applications \> Authorized Apps". This "Authorized Apps" page specifies which NASA EarthData applications can use your login credentials. Authorize the applications from which you will be downloading data. -![NASA EarthData Approved Applications](./images/nasa_earthdata/NASA_EarthData_applications.png) +::: {.figure} + +
NASA EarthData Approved Applications
+::: -### Prerequisite files +## Generate Prerequisite Files Downloading password-protected data from a URL requires user credentials. Without prerequisite files containing user credentials, the data will not be downloaded correctly. @@ -71,6 +77,8 @@ Without the prerequisite files the download step run without error, but trying t To demonstrate, try to download population density data [@sedac-gpwv4] from NASA's Socioeconomic Data and Applications Center (SEDAC) archive center. +
NASA SEDAC Population Density Data Characteristics
+ | | | |-----------------|-----------------------------------------------------------| | **Metric** | Population Density | @@ -113,7 +121,7 @@ To download the password protected data with command line commands, we must gene The following steps return errors for Windows system users. File generation on Windows is currently in development. ::: -#### `.netrc` {-} +### `.netrc` The following commands create the `.netrc` file, which contains your NASA EarthData Account credentials. @@ -169,7 +177,7 @@ paste0( ) ``` -#### `.urs_cookies` {-} +### `.urs_cookies` The following commands create the `.urs_cookies` file. @@ -199,7 +207,7 @@ file.exists(".urs_cookies") TRUE ``` -#### `.dodsrc` {-} +### `.dodsrc` The following commands create the `.dodsrc` file. @@ -270,7 +278,9 @@ if (.Platform$OS.type == "windows") { Enter these commands, as well as your username, password, and home directory, without error. Even a single misplaced character can disrupt the verification of your EarthData credentials. ::: -With the prerequisite files generated, try to download the SEDAC population data again. +## Download Data + +With the prerequisite files generated, try to download the SEDAC population density data again. ::: note Be sure to authorize the "SEDAC" applications at "My Profile"() under "Applications \> Authorized Apps" before running the following command. diff --git a/chapters/04-01-link-to-census.Rmd b/chapters/04-01-link-to-census.Rmd index c88cfda..556f7bf 100644 --- a/chapters/04-01-link-to-census.Rmd +++ b/chapters/04-01-link-to-census.Rmd @@ -29,7 +29,9 @@ Linking geocoded addresses to US Census geographic units is a common step in env The Census geoIDs can then serve as the basis for linking additional data to each geocoded address. Many types of data with importance for environmental health applications are available by Census geoID. Specifically, the Census collects and provides data by Census geoID for various social determinants of health (SDOH). Such Census SDOH data describe poverty, race/ethnicity, language, housing, and other socioeconomic characteristics. Increasingly, other data providers (e.g., other government agencies, research institutions, community science groups) are making their data available by Census geoID to help facilitate linkages with SDOH data. Such data cover various environment, climate, health, and built environment characteristics. -Example environmental health data sources readily available by Census geoID from US federal agencies include the following: +The following table lists example environmental health data sources readily available by Census geoID from US federal agencies. + +
Example Environmental Health Data Sources
| Data source | Geographic units | Example topics | |-----------------------|-------------------------|------------------------| @@ -50,25 +52,26 @@ The variable shape and spatial scale of Census geographic units is in contrast w [ZIP Code Tabulation Areas (ZCTAs)](https://www.census.gov/programs-surveys/geography/guidance/geo-areas/zctas.html), which represent the geographic areas used by the US Postal Service for ZIP codes, are another common boundary used in environmental health workflows. ZCTAs have no spatial relationship with block groups, tracts, counties, or states: that is, ZCTAs can cross or overlap those other geographic boundaries. Like other Census geographic boundaries, ZCTAs vary in spatial resolution and shape across the US. The spatial scale of ZCTAs is, on average, finer than counties but coarser than tracts. The following figure illustrates the variable spatial scale and shape of each type of geographic boundary: - -
- -![Census geographic units in Durham County, North Carolina, in 2010](images/link_to_census/geography_durham_co.png) -
+::: {.figure} + +
Census Geographic Units in Durham County, North Carolina, in 2010
+::: The Census assigns a unique identifying code, or [geoID](https://www.census.gov/programs-surveys/geography/guidance/geo-identifiers.html), to each geographic unit. This Census geoID is also referred to as a FIPS (Federal Information Processing Series) code. -The following table describes common types of Census geographic units and the structure of their geoIDs: +The following table describes common types of Census geographic units and the structure of their geoIDs. + +
Common Census Geographic Units in the United States
-| Geographic Unit | Total Units[^09-tutorial-link-point-to-census-geoid-1] | GeoID Desciption and Structure | Example Unit GeoID | Example Unit Name | +| Geographic Unit | Total Units[^09-tutorial-link-point-to-census-geoid-1] | GeoID Description and Structure | Example Unit GeoID | Example Unit Name | |---------------|---------------|---------------|---------------|---------------| | State | 50 | 2-digit state (S) code = SS | 09 | Connecticut (CT) | | County | 3143 | 5-digit code = 2-digit state (S) code + 3-digit county (C) code = SSCCC | 09007 | Middlesex County, CT | | Zip Code Tabulation Area (ZCTA) | 33,642 | 5-digit ZCTA (Z) code = ZZZZZ | 06480 | ZCTA for Portland, CT | | Tract | 84,414 | 11-digit code = 2-digit state (S) code + 3 digit county (C) code + 6-digit tract (T) code = SSCCCTTTTTT | 09007560100 | Tract 560100 in Middlesex County, CT | | Block group | 239,780 | 12-digit code = 2-digit state (S) code + 3 digit county (C) code + 6-digit tract (T) code + 1-digit block group (G) code = SSCCCTTTTTTG | 090075601001 | Block group 1 in Tract 560100 in Middlesex County, CT | -| Block | 8,132,968 | 15-digit code = 2-digit state (S) code + 3 digit county (C) code + 6-digit tract (T) code + 4-digit block (B) code (which contains the block group code as its first digit) = SSCCCTTTTTTGBBB | 090075601001004 | Block 1004 in Tract 560100 in Middlesex County, CT | +| Block | 8,132,968 | 15-digit code = 2-digit state (S) code + 3 digit county (C) code + 6-digit tract (T) code + 4-digit block (B) code (which contains the block group code as its first digit) = SSCCCTTTTTTBBBB | 090075601001004 | Block 1004 in Tract 560100 in Middlesex County, CT | [^09-tutorial-link-point-to-census-geoid-1]: Total number of geographic units in the US in 2020 (Source: [Census Bureau](https://www.census.gov/geographies/reference-files/time-series/geo/tallies.html)) @@ -274,10 +277,10 @@ nc_tracts_2010_map <- tmap::tm_shape(nc_tracts_2010_sf) + print(nc_tracts_2010_map) ``` -
- -![Census tracts in North Carolina in 2010](images/link_to_census/nc_tracts_2010_map.png) -
+::: {.figure} + +
Census Tracts in North Carolina in 2010
+::: Other [types and vintages of Census geographic boundaries](https://github.com/walkerke/tigris) are available through `tigris` and the related `tidycensus` package [@r-tidycensus]. In most cases, these boundaries are available for recent years (i.e., 1990 to present) and are accessed by state (i.e., users can download geographic boundaries for one state at a time, in separate files). @@ -308,11 +311,10 @@ linkage_map <- tmap::tm_shape(nc_tracts_2010_sf) + print(linkage_map) ``` -
- -![Census tracts in North Carolina in 2010 (grey) with sample geocoded addresses (red)](images/link_to_census/linkage_map.png) - -
+::: {.figure} + +
Census Tracts in North Carolina in 2010 (Grey) with Sample Geocoded Addresses (Red)
+::: Now that the geocoded addresses and Census tracts are in the same CRS, we can link each geocoded address to the Census tract that contains it using a spatial join. The following code produces a table of geocoded addresses linked to Census tract geoIDs: @@ -352,11 +354,10 @@ The fourth step is to link the AHRQ SDOH data to each geocoded address based on To start, we'll need to prepare the AHRQ SDOH data for Census tracts. This data is available to download as an Excel (XLSX) spreadsheet on the [AHRQ SDOH website](https://www.ahrq.gov/sdoh/data-analytics/sdoh-data.html) as shown in this screenshot: -
- -![Screenshot of AHRQ SDOH Website (September 13, 2023)](images/link_to_census/screenshot_sdoh_table.png) - -
+::: {.figure} + +
Screenshot of AHRQ SDOH Website (September 13, 2023)
+::: Before linking, we'll need to check the vintage of Census tracts used in the AHRQ SDOH data. To do this, we can review the AHRQ SDOH [Data Source Documentation](https://www.ahrq.gov/sites/default/files/wysiwyg/sdoh/SDOH-Data-Sources-Documentation-v1-Final.pdf) (accessed through the link shown in the website screenshot above). There we find the following information: @@ -366,13 +367,15 @@ Based on this information, we can use the geoIDs for year-2010 vintage Census tr For this example, we will link the AHRQ SDOH data for Census tracts in 2010 to our sample geocoded addresses on the basis of the year-2010 Census tract geoID. The SDOH AHRQ data for Census tracts in 2010 is provided as an Excel file. Because Excel files can contain multiple sheets, we'll need to first download and open the Excel file to understand which sheet(s) to read into `R`: -
- -![Screenshot of first sheet in Excel file (September 14, 2023)](images/link_to_census/screenshot_sdoh_xls_1.png) - -![Screenshot of second sheet in Excel file (September 14, 2023)](images/link_to_census/screenshot_sdoh_xls_2.png) +::: {.figure} + +
Screenshot of First Sheet in Excel File (September 14, 2023)
+::: -
+::: {.figure} + +
Screenshot of Second Sheet in Excel File (September 14, 2023)
+::: We find that the Excel file has two sheets: `Layout` and `Data`. The `Layout` sheet contains the data dictionary. The `Data` sheet contains \>300 columns of SDOH data by geoID for the \>70,000 Census tracts in the US in 2010. diff --git a/chapters/04-02-fhir-pit.Rmd b/chapters/04-02-fhir-pit.Rmd index bc7e414..a14f3f2 100644 --- a/chapters/04-02-fhir-pit.Rmd +++ b/chapters/04-02-fhir-pit.Rmd @@ -1,12 +1,10 @@ # A FHIR PIT Tutorial {#chapter-fhir-pit} -[![Profile-CMP](images/user_profiles/profilecmp.svg)](#profilecmp) -[![Profile-CRE](images/user_profiles/profilecre.svg)](#profilecre) -[![Profile-CDM](images/user_profiles/profilecdm.svg)](#profilecdm) +[![Profile-CMP](images/user_profiles/profilecmp.svg)](#profilecmp) [![Profile-CRE](images/user_profiles/profilecre.svg)](#profilecre) [![Profile-CDM](images/user_profiles/profilecdm.svg)](#profilecdm) [![Profile-GEO](images/user_profiles/profilegeo.svg)](#profilegeo) [![Profile-SBS](images/user_profiles/profilesbs.svg)](#profilesbs) ### FHIR PIT: HL7 Fast Healthcare Interoperability Resources Patient data Integration Tool: A Tutorial {.unnumbered} -**Date Modified**: April 30, 2024 +**Date Modified**: July 8, 2024 **Authors**: Juan Garcia [![author-jc](images/orcid.png){width="10"}](https://orcid.org/0009-0003-6503-2986), Kara Fecho [![author-kf](images/orcid.png){width="10"}](https://orcid.org/0000-0002-6704-9306), Hong Yi [![author-hy](images/orcid.png){width="10"}](https://orcid.org/0000-0002-5699-1259) @@ -26,11 +24,9 @@ This tutorial provides example code in `R`: The patient dataset used in this tutorial is synthetic. When using actual patient or participant datasets containing Protected Health Information (PHI), one must run FHIR PIT within a secure enclave and abide by all federal and institutional regulations. -Note that the FHIR PIT application may be memory-intensive, depending on the size of the input datasets. For the sample input data, this tutorial requires approximately 4 GB RAM to run successfully. - ### Motivation -Environmental exposures are increasingly recognized as important to consider when conducting human subjects research. Unfortunately, associating environmental exposures data with subject-level data is challenging due to the complexity of the data and the varied spatiotemporal resolutions. FHIR PIT is an open-source tool to link electronic health record (EHR) data in FHIR format with environmental exposures data derived from public sources. +Environmental exposures are increasingly recognized as important to consider when conducting human subjects research. Unfortunately, associating environmental exposures data with subject-level data is challenging due to the complexity of the data and the varied spatiotemporal resolutions. FHIR PIT is an open-source tool to link electronic health record (EHR) data in FHIR format with environmental exposures data derived from public sources. See Section [Considerations](#fhir-pit-considerations) for tools to support other common data models (CDMs). ### Background @@ -281,16 +277,18 @@ ggplot(icees2dei, aes(y = AvgDailyPM2.5Exposure, x = MaxDailyOzoneExposure)) + geom_smooth(method = "lm", se = FALSE) ``` - + -## Considerations +## Considerations {#fhir-pit-considerations} -FHIR PIT must be run within a secure enclave when working with real patient datasets, and all federal and insititutional regulations surrounding patient privacy and data security must be met. +FHIR PIT must be run within a secure enclave when working with real patient datasets, and all federal and institutional regulations surrounding patient privacy and data security must be met. FHIR PIT performs linkages between patient datasets and environmental exposures datasets using geocodes (i.e., patient primary residence), study period, and healthcare visit dates. FHIR PIT currently assumes a patient or subject resides in a single geolocation across a defined study period. We plan to adapt FHIR PIT to address mobility by, for example, assessing school-aged children's home versus public school exposures. -FHIR PIT can be memory-intensive when working with large input datasets. +Note that the FHIR PIT application may be memory-intensive, depending on the size of the input datasets. For the sample input data, this tutorial requires approximately 4 GB RAM to run successfully, but the memory requirements may be greater with large input datasets. + +Finally, FHIR PIT, by name and function, ingests FHIR files as the preferred CDM. For users who are working with the PCORnet CDM, we offer two tools to map PCORnet to FHIR: pcornet-to-fhir (https://github.com/RENCI/tx-pcornet-to-fhir) and CAMP FHIR (https://github.com/NCTraCSIDSci/camp-fhir). CAMP FHIR additionally maps i2b2 and OMOP to FHIR. Both tools are openly available. ## Concluding Remarks -This tutorial demonstrates how to execute FHIR PIT and analyze its output. The patient dataset used in this tutorial is synthetic and intended for demonstration purposes only. The environmental datasets used in this tutorial are randomly sampled from much larger datasets derived from public sources. Users may substititute the synthetic patient dataset and sample exposures datasets with their own datasets. To add your own data, please update the 'data/input' directory. For assistance with FHIR PIT or access to our environmental exposure datasets, please contact [fhir-pit\@renci.org](mailto:fhir-pit@renci.org){.email}. +This tutorial demonstrates how to execute FHIR PIT and analyze its output. The patient dataset used in this tutorial is synthetic and intended for demonstration purposes only. The environmental datasets used in this tutorial are randomly sampled from much larger datasets derived from public sources. Users may substitute the synthetic patient dataset and sample exposures datasets with their own datasets. To add your own data, please update the 'data/input' directory. For assistance with FHIR PIT or access to our environmental exposure datasets, please contact [fhir-pit\@renci.org](mailto:fhir-pit@renci.org){.email}. diff --git a/chapters/AA-01-user-profiles.Rmd b/chapters/AA-01-user-profiles.Rmd index 2547328..5794d9e 100644 --- a/chapters/AA-01-user-profiles.Rmd +++ b/chapters/AA-01-user-profiles.Rmd @@ -164,6 +164,7 @@ The CHORDS Toolkit chapters have been developed for various user profiles. This **Relevant Chapters:** - [NASA EarthData Download](#chapter-nasa-earthdata) +- [A FHIR PIT Tutorial](#chapter-fhir-pit) ::: ## Public Health Official ![](images/user_profiles/profilepho.svg) {#profilepho .unnumbered .unlisted} @@ -207,6 +208,7 @@ The CHORDS Toolkit chapters have been developed for various user profiles. This **Relevant Chapters:** - [Geospatial Data Foundations Unit](#unit-geospatial-foundations) +- [A FHIR PIT Tutorial](#chapter-fhir-pit) ::: ## Student ![](images/user_profiles/profilestu.svg) {#profilestu .unnumbered .unlisted} diff --git a/chapters/index.Rmd b/chapters/index.Rmd index 95e6679..6111c85 100644 --- a/chapters/index.Rmd +++ b/chapters/index.Rmd @@ -1,12 +1,10 @@ --- title: "The CHORDS Toolkit for Health and Geospatial Exposures Research" -subtitle: "Review Version Only -- Do Not Distribute" date: "Last Modified: `r format(Sys.time(), '%B %d, %Y')` (Version 1.0)" site: bookdown::bookdown_site documentclass: book bibliography: ["bibliography/book.bib", "bibliography/packages.bib"] -biblio-style: apalike -csl: "bibliography/chicago-author-date-with-note.csl" +csl: "bibliography/chicago-numeric-with-note.csl" link-citations: true link-bibliography: true description: The CHORDS Toolkit provides guides, tools, and example code to support climate change and human health research. @@ -18,19 +16,15 @@ output: bookdown::html_document2 --- -Researchers interested in studying the health impacts of climate change and related natural disasters need access to relevant, timely, and harmonized data on environmental exposures, social determinants of health variables, and health outcomes. However, such datasets are often developed for different purposes, reside in multiple locations, and require linkage. - -The Climate and Health Outcomes Research Data Systems (CHORDS) Toolkit seeks to aid researchers in accessing, processing, and integrating geospatial data-based exposures into their health research by providing guides, tools, and example code. +Researchers interested in studying the health impacts of climate change and climate related disasters need access to relevant, timely, and harmonized data on environmental exposures, social determinants of health variables, and health outcomes. However, such datasets are often developed for different purposes, reside in multiple locations, and require linkage. The Climate and Health Outcomes Research Data Systems (CHORDS) Project seeks to connect researchers to environmental and health datasets with a toolkit that provides guides, tutorials, and example code to improve integration of geospatial data-based exposures and health data and records into their research. ## About CHORDS {-} The [Climate and Health Outcomes Research Data Systems (CHORDS)](https://www.niehs.nih.gov/research/programs/chords) program provides resources aimed at making it easier for researchers to study the effects of place-based environmental exposures on health outcomes. The CHORDS resources include a [web-based data catalog](https://chords.niehs.nih.gov), standardized data sets, and this toolkit. -
- -![](images/chords-art-logo.png) - -
+::: {.figure} + +::: ## About This Toolkit {-} @@ -58,4 +52,5 @@ _This is a BETA Release. Please let us know of any improvements we can make._ ### Funding {-} -This resource was supported by the National Institutes of Health (NIH) from the National Institute of Environmental Health Sciences (NIEHS) and NIH Office of Data Science Strategy (ODSS), and the Department of Health and Human Services (DHHS) Office of the Assistant Secretary for Planning and Evaluation (ASPE) Office of the Secretary’s Patient Centered Outcomes Research Trust Fund (OS-PCORTF). +This resource was supported by the Department of Health and Human Services (DHHS) Office of the Assistant Secretary for Planning and Evaluation (ASPE) Office of the Secretary’s Patient Centered Outcomes Research Trust Fund (OS-PCORTF) and by the National Institutes of Health (NIH) from the National Institute of Environmental Health Sciences (NIEHS) and the NIH Office of Data Science Strategy (ODSS). + diff --git a/style.css b/style.css index 9b1a006..f66dcfd 100644 --- a/style.css +++ b/style.css @@ -12,6 +12,20 @@ pre { word-wrap: normal; } +figure { + padding: 0; + margin: auto; +} + +figcaption { + color: black; + font-style: italic; + font-weight: 500; + padding: 0 5px 20px; + margin: 0; + text-align: center; +} + pre code { white-space: inherit; } diff --git a/templates/chords-chapter-template.Rmd b/templates/chords-chapter-template.Rmd index 314f929..1cb810e 100644 --- a/templates/chords-chapter-template.Rmd +++ b/templates/chords-chapter-template.Rmd @@ -59,7 +59,7 @@ The following header items appear above the "Introduction" section: Please use any headings and sub-headings for the main part of the chapter as needed. -Short headings and sub-headings (with less than approximately 45 characters) are preferred for readability in the table of contents sidebar. +Short headings and first level sub-headings (with less than approximately 45 characters) are preferred for readability in the table of contents sidebar. ### Code Blocks @@ -87,25 +87,32 @@ This displays an important note with a light red background and siren icon. You can add image files to the "images" folder and then add them to your chapter with a caption like this: -
- -![This is a caption for the CHORDS logo.](images/chords-art-logo.png) +::: {.figure} + +
This is a Caption for the CHORDS Logo
+::: -
+You can add a table with a caption like this: -You can add a table like this: +
This is a Caption for an Example Table
|A |B |C | |-------------|--------------|----------------| |1 |Test |Example | |2 |Test |Example | -You can add a list like this: +You can add an itemized list like this: - Item A - Item B - Item C +You can add a numbered list like this: + +1. Item 1 +2. Item 2 +3. Item 3 + ### Citations Please include citations to datasets, packages, and other resources used in the chapter. To add a citation: @@ -118,7 +125,7 @@ A reference list will automatically be added to the end of the chapter and the e ## Concluding Remarks -Chapters end with a section titled "Concluding Remarks". This can be short. +Chapters can end with a section titled "Concluding Remarks". This can be short. ## Additional Resources