diff --git a/DESCRIPTION b/DESCRIPTION index f88196a..1dcc9c9 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,61 +1,63 @@ -Package: lterdatasampler -Title: Educational dataset examples from the Long Term Ecological Research program -Version: 0.1.0 -Date: 2020-08-01 -Authors@R: c( - person(given = "Allison", - family = "Horst", - role = c("aut", "cre"), - email = "ahorst@ucsb.edu", - comment = c(ORCID = "0000-0002-6047-5564")), - person(given = "Julien", - family = "Brun", - role = c("aut"), - email = "brun@nceas.ucsb.edu", - comment = c(ORCID = "0000-0002-7751-6238")), - person(given = "Sam", - family = "Guo", - role = c("ctb"), - comment = c("https://github.com/TokyoExpress")), - person(given = "Adhitya", - family = "Logan", - role = c("ctb"), - comment = c("https://github.com/adhil0")), - person(given = "Lian", - family = "Ran", - role = c("ctb"), - comment = c("https://github.com/liaaaaran")), - person(given = "Sophia", - family = "Sternberg", - role = c("ctb"), - comment = c("https://github.com/sophiasternberg")), - person(given = "Karen", - family = "Zhao", - role = c("ctb"), - comment = c("https://github.com/karenezhao")) - ) -Description: Selected datasets, one from each LTER site. -License: CC0 -Encoding: UTF-8 -LazyData: true -Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.2 -Depends: - R (>= 2.10) -Suggests: - knitr, - rmarkdown, - janitor, - usethis, - metajam, - tidyverse, - here, - broom, - sf, - ggmap, - leaflet, - lubridate, - patchwork -URL: https://github.com/lter/lterdatasampler -BugReports: https://github.com/lter/lterdatasampler/issues -VignetteBuilder: knitr +Package: lterdatasampler +Title: Educational dataset examples from the Long Term Ecological Research program +Version: 0.1.0 +Date: 2020-08-01 +Authors@R: c( + person(given = "Allison", + family = "Horst", + role = c("aut", "cre"), + email = "ahorst@ucsb.edu", + comment = c(ORCID = "0000-0002-6047-5564")), + person(given = "Julien", + family = "Brun", + role = c("aut"), + email = "brun@nceas.ucsb.edu", + comment = c(ORCID = "0000-0002-7751-6238")), + person(given = "Sam", + family = "Guo", + role = c("ctb"), + comment = c("https://github.com/TokyoExpress")), + person(given = "Adhitya", + family = "Logan", + role = c("ctb"), + comment = c("https://github.com/adhil0")), + person(given = "Lian", + family = "Ran", + role = c("ctb"), + comment = c("https://github.com/liaaaaran")), + person(given = "Sophia", + family = "Sternberg", + role = c("ctb"), + comment = c("https://github.com/sophiasternberg")), + person(given = "Karen", + family = "Zhao", + role = c("ctb"), + comment = c("https://github.com/karenezhao")) + ) +Description: Selected datasets, one from each LTER site. +License: CC0 +Encoding: UTF-8 +LazyData: true +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.2 +Depends: + R (>= 2.10) +Suggests: + knitr, + rmarkdown, + janitor, + usethis, + metajam, + tidyverse, + here, + broom, + sf, + ggmap, + leaflet, + lubridate, + patchwork, + gstat, + stars +URL: https://github.com/lter/lterdatasampler +BugReports: https://github.com/lter/lterdatasampler/issues +VignetteBuilder: knitr diff --git a/R/cap_birds_doc.R b/R/cap_birds_doc.R new file mode 100644 index 0000000..57c9b4b --- /dev/null +++ b/R/cap_birds_doc.R @@ -0,0 +1,25 @@ +#' Bird Community Observations (2000 - 2019), Central Arizona - Phoenix LTER +#' +#' Bird count data collected in different seasons using a standardized point-count protocol. In 2000, sites were visited twice. In 2001 each site was visited in each season by three independent observers. In 2005, sites were visited in three seasons, and from 2006 - 2019, only in spring and winter. +#' +#' @format A tibble with 172,175 rows and 14 variables +#' \describe{ +#' \item{survey_id}{a number denoting the unique identity of the bird survey} +#' \item{site_code}{a character denoting the identity of the survey location} +#' \item{survey_date}{a date denoting the date of observation} +#' \item{time_start}{a time denoting the start time of the observation period} +#' \item{time_end}{a time denoting the end time of the observation period} +#' \item{common_name}{a character denoting the common name of the species observed} +#' \item{distance}{a character denoting the distance of the observed birds from the observer in meters} +#' \item{bird_count}{a number denoting the count of birds observed during the survey period} +#' \item{seen}{a boolean denoting whether the bird was seen by the observer} +#' \item{heard}{a boolean denoting whether the bird was heard by the observer} +#' \item{direction}{a character denoting the direction of the bird from the observation point} +#' \item{location_type}{a character denoting the physical or research context of the site} +#' \item{lat}{a number denoting the latitude of the survey site in degrees} +#' \item{long}{a number denoting the longitude of the survey site in degrees} +#' } +#' @source {Warren, P., S.B. Lerman, H. Bateman, M. Katti, and E. Shochat. 2022. Point-count bird censusing: long-term monitoring of bird abundance and diversity in central Arizona-Phoenix, ongoing since 2000 ver 19. Environmental Data Initiative.} +#' \url{https://doi.org/10.6073/pasta/4fca7c8a6cd56a6abed9834aca72e764} + +"cap_birds" diff --git a/data-raw/cap_birds_data.R b/data-raw/cap_birds_data.R new file mode 100644 index 0000000..51f8dd1 --- /dev/null +++ b/data-raw/cap_birds_data.R @@ -0,0 +1,79 @@ +#' --- +#' title: "Data Preparation" +#' --- +#' ### Download the raw data from EDI.org + +#+ download_data, eval=FALSE +library(metajam) +library(tidyverse) + +# Point-count bird censusing: long-term monitoring of bird abundance and diversity in central Arizona-Phoenix, ongoing since 2000 +# Main URL: https://doi.org/10.6073/pasta/4fca7c8a6cd56a6abed9834aca72e764 + +# Location Data +locations_url <- + "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-cap.46.19&entityid=86856afb2e6779f5acb261a2b04d1e13" + + +# Sighting data +cap_url <- + "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-cap.46.19&entityid=832065765ce5a9a8238a0b25cb549722" + +# Download the data packages with metajam +locations_download <- + download_d1_data(data_url = locations_url, path = tempdir()) + +cap_download <- + download_d1_data(data_url = cap_url, path = tempdir()) + +#' ### Data cleaning +#+ data sampling, eval=FALSE + +# Read in location data +locations_files <- + read_d1_files(locations_download) +locations_raw <- locations_files$data + +# Discard repeated site locations. Some sites were moved over the course of the study, but the changes in location were very small +locations <- locations_raw %>% + select( + -begin_date, + -begin_date_month, + -begin_date_year, + -end_date, + -end_date_month, + -end_date_year, + ) %>% + mutate( + location_type = recode( + location_type, + "ESCA" = "Ecological Survey of Central Arizona", + "NDV" = "North Desert Village", + "riparian" = "Riparian", + "SRBP" = "Salt River Biodiversity Project", + "PASS" = "Phoenix Area Social Survey", + "desert_fertilization" = "Desert Fertilization" + ) + ) %>% + group_by(site_code) %>% + slice_tail() %>% + ungroup() + +# Read in count data +cap_files <- read_d1_files(cap_download) +cap_birds_raw <- cap_files$data + +# Select needed variables, spell out abbreviations +cap_birds <- + cap_birds_raw %>% + mutate(survey_date = as.Date(survey_date)) %>% + select(-observer, -code, -observation_notes, -qccomment) %>% + mutate(distance = recode(distance, "FT" = "Flying Through")) + +# Combine location data with count data +cap_birds <- + cap_birds %>% full_join(locations, by = "site_code") + +#+ save data, include=FALSE, EVAL=FALSE +## Save sample file +usethis::use_data(cap_birds, overwrite = TRUE) diff --git a/data/cap_birds.rda b/data/cap_birds.rda new file mode 100644 index 0000000..051f87f Binary files /dev/null and b/data/cap_birds.rda differ diff --git a/man/cap_birds.Rd b/man/cap_birds.Rd new file mode 100644 index 0000000..3f599e6 --- /dev/null +++ b/man/cap_birds.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cap_birds_doc.R +\docType{data} +\name{cap_birds} +\alias{cap_birds} +\title{Bird Community Observations (2000 - 2019), Central Arizona - Phoenix LTER} +\format{ +A tibble with 172,175 rows and 14 variables +\describe{ +\item{survey_id}{a number denoting the unique identity of the bird survey} +\item{site_code}{a character denoting the identity of the survey location} +\item{survey_date}{a date denoting the date of observation} +\item{time_start}{a time denoting the start time of the observation period} +\item{time_end}{a time denoting the end time of the observation period} +\item{common_name}{a character denoting the common name of the species observed} +\item{distance}{a character denoting the distance of the observed birds from the observer in meters} +\item{bird_count}{a number denoting the count of birds observed during the survey period} +\item{seen}{a boolean denoting whether the bird was seen by the observer} +\item{heard}{a boolean denoting whether the bird was heard by the observer} +\item{direction}{a character denoting the direction of the bird from the observation point} +\item{location_type}{a character denoting the physical or research context of the site} +\item{lat}{a number denoting the latitude of the survey site in degrees} +\item{long}{a number denoting the longitude of the survey site in degrees} +} +} +\source{ +{Warren, P., S.B. Lerman, H. Bateman, M. Katti, and E. Shochat. 2022. Point-count bird censusing: long-term monitoring of bird abundance and diversity in central Arizona-Phoenix, ongoing since 2000 ver 19. Environmental Data Initiative.} +\url{https://doi.org/10.6073/pasta/4fca7c8a6cd56a6abed9834aca72e764} +} +\usage{ +cap_birds +} +\description{ +Bird count data collected in different seasons using a standardized point-count protocol. In 2000, sites were visited twice. In 2001 each site was visited in each season by three independent observers. In 2005, sites were visited in three seasons, and from 2006 - 2019, only in spring and winter. +} +\keyword{datasets} diff --git a/vignettes/cap_birds_vignette.Rmd b/vignettes/cap_birds_vignette.Rmd new file mode 100644 index 0000000..b6a1806 --- /dev/null +++ b/vignettes/cap_birds_vignette.Rmd @@ -0,0 +1,383 @@ +--- +title: "Central Arizona - Phoenix Bird Locations (CAP)" +subtitle: "Bird Locations from 2000 - 2019 at Central Arizona - Phoenix LTER" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{cap_birds_vignette} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.width = 7, + fig.height = 4, + message = FALSE, + warning = FALSE +) +``` + +### Dataset sample used + +- `cap_birds` + + +# Introduction + +As urban environments continue to expand, researchers at the Central Arizona-Phoenix Long-Term Ecological Research (CAP LTER) site study the changing ecological landscape surrounding the Phoenix, Arizona metropolitan area. + +One project conducted at the CAP LTER monitors bird species as well as their geographic distribution. This data can be used to analyze relationships between native and non-native species and how urbanization is impacting them. + +# Data Exploration + +```{r setup} +library(lterdatasampler) +library(tidyverse) +library(ggmap) +library(sf) +library(stars) +library(gstat) +``` + + +```{r} +glimpse(cap_birds) +``` +Two species observed in the data that can help us explore this phenomenon are Phainopepla (_Phainopepla nitens_), a species native to Phoenix, and the Rock Pigeon (_Columba livia_), which are not native to the local environment. + +We can filter our dataset to only include these two species: + +```{r} +phainopepla <- cap_birds %>% + filter(common_name == "Phainopepla") + +rock_pigeon <- cap_birds %>% + filter(common_name == "Rock Pigeon") +``` + +We can now try to plot the locations of the observed birds. We can use the `sf` package to make our life easier when working with spatial data. The `nwt_pikas` [vignette](https://lter.github.io/lterdatasampler/articles/nwt_pikas_vignette.html#converting-to-simple-features) contains more details about working with `sf`. + +Incorporate sf with our data: + +```{r} +phainopepla <- phainopepla %>% + st_as_sf(coords = c("long", "lat")) + + +rock_pigeon <- rock_pigeon %>% + st_as_sf(coords = c("long", "lat")) +``` + +If we try to plot our data now, both plots will simply show the survey locations, without any information between the points: +```{r} +ggplot(data = phainopepla) + + geom_sf() + + labs( + title = "Locations of Phainopeplas", + subtitle = "CAP LTER", + x = "Longitude (Degrees)", + y = "Latitude (Degrees)" + ) + + theme_minimal() +``` + +```{r} +ggplot(data = rock_pigeon) + + geom_sf() + + labs( + title = "Locations of Rock Pigeons", + subtitle = "CAP LTER", + x = " Longitude (Degrees)", + y = "Latitude (Degrees)" + ) + + theme_minimal() +``` + + +# Kriging + +To estimate how many birds are present in locations between the points, we can use a spatial interpolation technique called kriging. As described by [Columbia Public Health](http://www.publichealth.columbia.edu/research/population-health-methods/kriging), kriging uses nearby sample points to estimate values at points where no data was collected. + +First let's manipulate our data so that we have our data locations and how many birds of each species were observed there: + +```{r} +ph_sum <- + phainopepla %>% filter(!is.na(bird_count)) %>% group_by(site_code) %>% summarise(count = sum(bird_count)) + +rock_sum <- + rock_pigeon %>% filter(!is.na(bird_count)) %>% group_by(site_code) %>% summarise(count = sum(bird_count)) + +glimpse(ph_sum) +glimpse(rock_sum) +``` + +There are many types of kriging. For this example, we can use ordinary kriging, which is one of the simpler types. + +Ordinary kriging assumes that the mean and variance of the values is constant, and that the data is normally distributed. + +## Data Transformation + +Examine how the data is distributed: + +```{r} +ggplot(data = ph_sum) + + geom_histogram(aes(x = count)) + + labs( + title = "Distribution of Phainopepla Counts", + subtitle = "CAP LTER", + x = "Number of Birds", + y = "Frequency" + ) + + theme_minimal() + +ggplot(data = rock_sum) + + geom_histogram(aes(x = count)) + + labs( + title = "Distribution of Rock Pigeon Counts", + subtitle = "CAP LTER", + x = "Number of Birds", + y = "Frequency" + ) + + theme_minimal() +``` + +The data for both species is skewed, so we can use a Box-Cox transformation to help us normalize it. + +Box-Cox Formula: + +$$y(\lambda) = \begin{cases} \frac{y^\lambda-1}{\lambda} & \lambda \neq 0\\ log(y) & \lambda = 0 \end{cases}$$ + +Derive lambda for both species: + +```{r fig.show='hide'} +ph_box <- MASS::boxcox(lm(ph_sum %>% pluck("count") ~ 1)) +ph_max_index <- ph_box %>% pluck("y") %>% which.max() +ph_lambda <- ph_box %>% pluck("x", ph_max_index) + +rock_box <- MASS::boxcox(lm(rock_sum %>% pluck("count") ~ 1)) +rock_max_index <- rock_box %>% pluck("y") %>% which.max() +rock_lambda <- rock_box %>% pluck("x", rock_max_index) +``` + +Transform data: + +```{r} +ph_sum <- + ph_sum %>% mutate(count = ((count ^ ph_lambda) - 1) / ph_lambda) + +rock_sum <- + rock_sum %>% mutate(count = ((count ^ rock_lambda) - 1) / rock_lambda) +``` + +Let's use QQ Plots to see if our transformed data is normally distributed: + +```{r} +ggplot(ph_sum, aes(sample = count)) + + stat_qq() + + stat_qq_line() + + labs( + title = "Phainopepla Normal Q-Q Plot", + subtitle = "CAP LTER", + x = "Theoretical Quantiles", + y = "Sample Quantiles" + ) + + theme_minimal() + +ggplot(rock_sum, aes(sample = count)) + + stat_qq() + + stat_qq_line() + + labs( + title = "Rock Pigeon Normal Q-Q Plot", + subtitle = "CAP LTER", + x = "Theoretical Quantiles", + y = "Sample Quantiles" + ) + + theme_minimal() +``` + +Since the points fall approximately along the line, we can assume our data is now normal. + +## Variogram + +Next, we must fit a variogram to our data. A variogram is a measure of the half mean-squared difference between our values and the distance between them, and will help us determine the weights of our kriging model. + +First we can generate our experimental variogram: + +```{r} +ph_vgm <- variogram(count ~ 1, data = ph_sum) + +rock_vgm <- variogram(count ~ 1, data = rock_sum) +``` + +Plot experimental variogram: + +```{r} +ggplot(data = ph_vgm) + + geom_point(aes(x = dist, y = gamma)) + + labs( + title = "Phainopepla Experimental Variogram", + subtitle = "CAP LTER", + x = "Distance", + y = "Semivariance" + ) + + theme_minimal() + +ggplot(data = rock_vgm) + + geom_point(aes(x = dist, y = gamma)) + + labs( + title = "Rock Pigeon Experimental Variogram", + subtitle = "CAP LTER", + x = "Distance", + y = "Semivariance" + ) + + theme_minimal() +``` + +Now we can fit our theoretical variogram so that we can predict values for places where we don't have data. There are preset functions that can be used for our model: + +```{r} +vgm() +``` + +You can use cross-validation to determine which of these models best fits your data, but we'll choose the spherical model for the Phainopepla data and the Matern model for the Rock Pigeon data: + +```{r} +ph_fit <- fit.variogram(ph_vgm, vgm("Sph")) + +rock_fit <- fit.variogram(rock_vgm, vgm("Mat")) +``` + +Plot experimental and theoretical variogram: + +```{r} +ph_dist <- ph_vgm %>% pull(dist) %>% max() +line <- variogramLine(ph_fit, maxdist = ph_dist) +ggplot(data = ph_vgm) + + geom_point(aes(x = dist, y = gamma)) + + geom_line(data = line, aes(x = dist, y = gamma)) + + labs( + title = "Experimental and Theoretical Variograms for Phainopepla", + subtitle = "CAP LTER", + x = "Distance", + y = "Semivariance" + ) + + theme_minimal() +``` + +```{r} +rock_dist <- rock_vgm %>% pull(dist) %>% max() +line <- variogramLine(rock_fit, maxdist = rock_dist) +ggplot(data = rock_vgm) + + geom_point(aes(x = dist, y = gamma)) + + geom_line(data = line, aes(x = dist, y = gamma)) + + labs( + title = "Experimental and Theoretical Variograms for Phainopepla", + subtitle = "CAP LTER", + x = "Distance", + y = "Semivariance" + ) + + theme_minimal() +``` + +Now that we have theoretical variograms, we can use it to help us in our kriging. + +## Kriging Calculation + +First, we need to create a grid so that we can map our predictions. + +```{r} +cap_grid <- + ph_sum %>% pull(geometry) %>% st_make_grid() %>% st_as_stars() +``` + +Now we can generate our predictions using our data, grid, and variogram. Use `data ~ 1` for ordinary kriging: + +```{r} +ph_krige <- + krige(count ~ 1, #ordinary kriging + ph_sum, + cap_grid, + ph_fit) + +ph_krige_df <- ph_krige %>% as.data.frame() + + +rock_krige <- krige(count ~ 1, #ordinary kriging + rock_sum, + cap_grid, + rock_fit) + +rock_krige_df <- rock_krige %>% as.data.frame() +``` + +## Plotting Results + +To get a better sense of how our data relates to the terrain of the area of observation, we can use the `ggmap` package to add an online map to our plots. See the [`nwt_pikas` vignette](https://lter.github.io/lterdatasampler/articles/nwt_pikas_vignette.html) for more guidance. + +Get online map: + +```{r} +phoenix <- + get_stamenmap( + c( + left = -112.68090, + bottom = 33.21870, + right = -111.57948, + top = 33.88142 + ), + maptype = "terrain", + zoom = 10 + ) +``` + +Combine with our data plot: +```{r} +ggmap(phoenix) + + geom_raster( + ph_krige_df, + mapping = aes(x = x, + y = y, + fill = var1.pred), + alpha = 0.7, + interpolate = FALSE + ) + + coord_cartesian() + + scale_fill_viridis_c() + + labs( + title = "Interpolated Locations of Phainopeplas", + subtitle = "CAP LTER", + x = "Longitude (Degrees)", + y = "Latitude (Degrees)" + ) +``` + +```{r} +ggmap(phoenix) + + geom_raster( + rock_krige_df, + mapping = aes(x = x, + y = y, + fill = var1.pred), + alpha = 0.7, + interpolate = FALSE + ) + + coord_cartesian() + + scale_fill_viridis_c() + + labs( + title = "Interpolated Locations of Rock Pigeons", + subtitle = "CAP LTER", + x = "Longitude (Degrees)", + y = "Latitude (Degrees)" + ) +``` + +Now can we see that the native Phainopeplas are more commonly sighted in rural environments, while the non-native Rock Pigeons are found in more urban settings. + +# Citation + +Warren, P., S.B. Lerman, H. Bateman, M. Katti, and E. Shochat. 2020. Point-count bird censusing: long-term monitoring of bird abundance and diversity in central Arizona-Phoenix, ongoing since 2000 ver 18. Environmental Data Initiative. https://doi.org/10.6073/pasta/4fca7c8a6cd56a6abed9834aca72e764 (Accessed 2022-03-17). + +# How we processed the raw data +`r knitr::spin_child(here::here("data-raw", "cap_birds_data.R"))`