From 0c2ea3e6e58ca81aeca12039e9b0a583d875ea5f Mon Sep 17 00:00:00 2001 From: David Blodgett Date: Wed, 16 Jun 2021 09:03:22 -0500 Subject: [PATCH] wells and gfv11 work --- .gitignore | 3 +- R/get_data.R | 83 ++++++++++++++++++++++++++++++++++++++++++--- R/make_poi_ld.R | 67 ++++++++++++++++++++++++++++++++++++ R/reference_units.R | 17 ++++------ runner.R | 42 ++++++++++++----------- 5 files changed, 176 insertions(+), 36 deletions(-) create mode 100644 R/make_poi_ld.R diff --git a/.gitignore b/.gitignore index 34ad631..8f219b9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ data/* .drake *.csv *.geojson -*.DS_Store \ No newline at end of file +*.DS_Store +out/* \ No newline at end of file diff --git a/R/get_data.R b/R/get_data.R index 6b402b0..b6f09fd 100644 --- a/R/get_data.R +++ b/R/get_data.R @@ -1,12 +1,87 @@ get_nwis_sites <- function() { - hu02 <- c(paste0("0", c(1:9)), paste0("1", c(0:9)), "20", "21") - sites <- do.call(rbind, lapply(hu02, function(x) { - importRDB1(paste0("https://waterservices.usgs.gov/nwis/site/?site_output=expanded&format=rdb&huc=", - x)) + st_code <- c(paste0("0", c(1:9)), as.character(c(10:98))) + + sites <- do.call(rbind, lapply(st_code, function(x) { + url <- paste0("https://waterservices.usgs.gov/nwis/site/?site_output=expanded&format=rdb&stateCd=", x) + + message(url) + + try(importRDB1(url)) })) + + sites +} + +get_nwis_wells <- function(sites) { + + gw_site <- filter(sites, grepl("^GW.*|^sb.*", site_tp_cd) & + !is.na(dec_long_va) & + !is.na(dec_lat_va)) %>% + select(dec_lat_va, dec_long_va, site_no, station_nm, site_no, + nat_aqfr_cd, aqfr_cd, aqfr_type_cd, + well_depth_va, hole_depth_va, alt_va, alt_datum_cd) %>% + group_by(site_no) %>% + arrange(nat_aqfr_cd) %>% # Ensures we select ones that have a national aquifer code + filter(n() == 1) %>% + ungroup() %>% + st_as_sf(coords = c("dec_long_va", "dec_lat_va"), crs = 4326) %>% + mutate(description = paste0("USGS NWIS Subsurface Site ", site_no, ": ", station_nm), + subjectOf = paste0("https://waterdata.usgs.gov/monitoring-location/", site_no), + # uri = paste0("https://geoconnex.us/ref/gages/", n()), + provider = "https://waterdata.usgs.gov", + provider_id = site_no, + national_aquifer = ifelse(!is.na(nat_aqfr_cd), + paste0("https://geoconnex.us/ref/nat_aq/", + nat_aqfr_cd), "")) %>% + select(name = station_nm, + description, + subjectOf, + provider, + provider_id, + national_aquifer) + } get_wbd_gdb <- function(wbd_dir) { nhdplusTools::download_wbd(outdir = wbd_dir, url = "https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/WBD/National/GDB/WBD_National_GDB.zip") +} + +get_gf_11_poi <- function() { + temp_gdb <- file.path(tempdir(), "GFv1.1.gdb.zip") + + sbtools::item_file_download("5e29d1a0e4b0a79317cf7f63", + names = "GFv1.1.gdb.zip", + destinations = temp_gdb) + + zip::unzip(temp_gdb, exdir = tempdir()) + + temp_gdb <- gsub(".zip", "", temp_gdb) + + list(POIs = sf::read_sf(temp_gdb, "POIs_v1_1"), TBto = sf::read_sf(temp_gdb, "TBtoGFv1_POIs")) +} + +get_nhdplus_crosswalk <- function() { + url <- "https://s3.amazonaws.com/edap-nhdplus/NHDPlusV21/Data/NationalData/NHDPlusV21_NationalData_V1_To_V2_Crosswalk_01.7z" + + zip_file <- "data/NHDPlusV21_NationalData_V1_To_V2_Crosswalk_01.7z" + + out_dir <- "data/v1_v2/" + + if(!dir.exists(out_dir)) { + httr::GET(url, config = httr::write_disk(zip_file, overwrite = TRUE)) + + system(paste0("7z -o", normalizePath(out_dir), " e ", normalizePath(zip_file)), intern = TRUE) + } + + normalizePath(file.path(out_dir, "NHDPlusV1Network_V2Network_Crosswalk.dbf")) +} + +get_v2_flowlines <- function() { + nhd_data <- "data/nhdp/NHDPlusNationalData/NHDPlusV21_National_Seamless.gdb" + if(!dir.exists(nhd_data)) { + stop(paste("expect nhdp data available at \n", nhd_data)) + } + sf::read_sf(nhd_data, + "NHDFlowline_Network") } \ No newline at end of file diff --git a/R/make_poi_ld.R b/R/make_poi_ld.R new file mode 100644 index 0000000..3c10a06 --- /dev/null +++ b/R/make_poi_ld.R @@ -0,0 +1,67 @@ +index_pois <- function(POIs, v1_v2, flines) { + transboundary <- POIs$TBto + + POIs <- POIs$POIs + + # GF11 in a USGS namespace + uri <- paste0("https://geoconnex.us/usgs/nhm_gf11/", POIs$seg_id_nhm) + + lookup <- foreign::read.dbf(normalizePath(v1_v2), as.is = TRUE) + + POIs <- left_join(POIs, select(lookup, V2_ComID, V1_ComID, XWalkType), + by = c("NHDPlusID" = "V1_ComID")) + + NA_POIs <- filter(POIs, is.na(XWalkType)) + + NA_POI_index <- nhdplusTools::get_flowline_index(flines, + sf::st_zm(NA_POIs), + search_radius = 0.1) + + NA_POIs$id <- seq_len(nrow(NA_POIs)) + + NA_POIs <- left_join(NA_POIs, NA_POI_index, by = "id") + + NA_POIs <- select(NA_POIs, -id) + + not_1_1_POIs <- filter(POIs, !is.na(XWalkType) & XWalkType != "1-1") + + not_1_1_POIs_index <- nhdplusTools::get_flowline_index( + filter(flines, COMID %in% not_1_1_POIs$V2_ComID), + not_1_1_POIs, + search_radius = 0.1 + ) + + not_1_1_POIs <- bind_cols(not_1_1_POIs, select(not_1_1_POIs_index, -id)) + + ## Seg ID NHM is a sequence with some funny replacement near the border + mapped_POIs <- filter(POIs, !seg_id_nhm %in% c(NA_POIs$seg_id_nhm, + not_1_1_POIs$seg_id_nhm)) + + mapped_POIs_index <- nhdplusTools::get_flowline_index( + filter(flines, COMID %in% mapped_POIs$V2_ComID), + mapped_POIs, + search_radius = 0.1 + ) + + mapped_POIs <- bind_cols(mapped_POIs, select(mapped_POIs_index, -id)) + + + really_no_match <- filter(NA_POIs, is.na(COMID)) + + + bind_rows(mapped_POIs, not_1_1_POIs, mapped_POIs) +} + +make_poi_ld <- function(gf11_pois, nhdplus2_attributes) { + + nhdplus2_attributes <- select(nhdplus2_attributes, COMID, GNIS_ID, StreamOrde, TotDASqKM) + + gf11_pois <- left_join(gf11_pois, nhdplus2_attributes, by = c("V2_ComID" = "COMID")) + + # GNIS + # nhdplusv21 comid + # NWIS gage + + out <- select(gf11_pois, seg_id_nhm, GNIS_ID, GNIS_NAME, V2_ComID, V1_ComID = NHDPlusID, gage = Type_Gage, NHD_Unit, REACHCODE, REACH_meas, offset, StreamOrde, TotDASqKM) + +} \ No newline at end of file diff --git a/R/reference_units.R b/R/reference_units.R index 8a2012d..2b568bd 100644 --- a/R/reference_units.R +++ b/R/reference_units.R @@ -1,21 +1,16 @@ -get_hu <- function(wbd_gdb, hu_layer, id_attribute, gnis_base, pid_base, - out, landing_base, csv_out, description, creator = "dblodgett@usgs.gov") { - hu <- sf::read_sf(wbd_gdb, hu_layer) +get_hu <- function(wbd_gdb, hu_layer, id_attribute, pid_base, + out, landing_base, csv_out, description, + creator = "dblodgett@usgs.gov") { - hu <- rmapshaper::ms_simplify(hu, sys = TRUE) + hu <- sf::read_sf(wbd_gdb, hu_layer) - if(!is.na(hu$GNIS_ID)) { - hu$gnis_url <- paste0(gnis_base, - hu$GNIS_ID) - } else { - hu$gnis_url <- "" - } + hu <- rmapshaper::ms_simplify(hu) names(hu)[names(hu) == id_attribute] <- "temp_id" hu$uri <- paste0(pid_base, hu$temp_id) - hu <- dplyr::select(hu, uri, NAME, gnis_url, GNIS_ID, temp_id, LOADDATE) + hu <- dplyr::select(hu, uri, NAME, temp_id, LOADDATE) hu_level <- nchar(hu$temp_id[1]) diff --git a/runner.R b/runner.R index 91cc9d7..d1cb527 100644 --- a/runner.R +++ b/runner.R @@ -8,56 +8,58 @@ library(drake) sourced <- sapply(list.files("R", pattern = "*.R", full.names = TRUE), source) plan <- drake_plan(nwis_sites = get_nwis_sites(), + nwis_wells = get_nwis_wells(nwis_sites), + nwis_wells_out = sf::write_sf(nwis_wells, "out/nwis_wells.geojson"), + POIs = get_gf_11_poi(), + v1_v2 = get_nhdplus_crosswalk(), + nhdplusv2_flines = get_v2_flowlines(), + nhdplus2_attributes = sf::st_drop_geometry(nhdplusv2_flines), + gf11_pois = index_pois(POIs, v1_v2, flines = nhdplusv2_flines), huc = target( make_nwis_huc_redirects(nwis_sites, file_out("out/hydrologic-unit.csv"))), wbd_gdb = get_wbd_gdb("data/wbd/"), hu02 = get_hu(wbd_gdb = wbd_gdb, - hu_layer = "WBDHU2", + hu_layer = "WBDHU2", id_attribute = "HUC2", - gnis_base = "https://geonames.usgs.gov/apex/f?p=gnispq:3:::NO::P3_FID:", - pid_base = "https://geoconnex.us/ref/hu02/", - out = file_out("out/hu02.gpkg"), + pid_base = "https://geoconnex.us/ref/hu02/", + out = file_out("out/hu02.gpkg"), landing_base = "https://info.geoconnex.us/collections/hu02/items/", csv_out = file_out("out/hu02.csv"), description = "two digit hydrologic units reference", creator = "dblodgett@usgs.gov"), hu04 = get_hu(wbd_gdb = wbd_gdb, - hu_layer = "WBDHU4", + hu_layer = "WBDHU4", id_attribute = "HUC4", - gnis_base = "https://geonames.usgs.gov/apex/f?p=gnispq:3:::NO::P3_FID:", - pid_base = "https://geoconnex.us/ref/hu04/", - out = file_out("out/hu04.gpkg"), + pid_base = "https://geoconnex.us/ref/hu04/", + out = file_out("out/hu04.gpkg"), landing_base = "https://info.geoconnex.us/collections/hu04/items/", csv_out = file_out("out/hu04.csv"), description = "four digit hydrologic units reference", creator = "dblodgett@usgs.gov"), hu06 = get_hu(wbd_gdb = wbd_gdb, - hu_layer = "WBDHU6", + hu_layer = "WBDHU6", id_attribute = "HUC6", - gnis_base = "https://geonames.usgs.gov/apex/f?p=gnispq:3:::NO::P3_FID:", - pid_base = "https://geoconnex.us/ref/hu06/", - out = file_out("out/hu06.gpkg"), + pid_base = "https://geoconnex.us/ref/hu06/", + out = file_out("out/hu06.gpkg"), landing_base = "https://info.geoconnex.us/collections/hu06/items/", csv_out = file_out("out/hu06.csv"), description = "six digit hydrologic units reference", creator = "dblodgett@usgs.gov"), hu08 = get_hu(wbd_gdb = wbd_gdb, - hu_layer = "WBDHU8", + hu_layer = "WBDHU8", id_attribute = "HUC8", - gnis_base = "https://geonames.usgs.gov/apex/f?p=gnispq:3:::NO::P3_FID:", - pid_base = "https://geoconnex.us/ref/hu08/", - out = file_out("out/hu08.gpkg"), + pid_base = "https://geoconnex.us/ref/hu08/", + out = file_out("out/hu08.gpkg"), landing_base = "https://info.geoconnex.us/collections/hu08/items/", csv_out = file_out("out/hu08.csv"), description = "eight digit hydrologic units reference", creator = "dblodgett@usgs.gov"), hu10 = get_hu(wbd_gdb = wbd_gdb, - hu_layer = "WBDHU10", + hu_layer = "WBDHU10", id_attribute = "HUC10", - gnis_base = "https://geonames.usgs.gov/apex/f?p=gnispq:3:::NO::P3_FID:", - pid_base = "https://geoconnex.us/ref/hu10/", - out = file_out("out/hu10.gpkg"), + pid_base = "https://geoconnex.us/ref/hu10/", + out = file_out("out/hu10.gpkg"), landing_base = "https://info.geoconnex.us/collections/hu10/items/", csv_out = file_out("out/hu10.csv"), description = "ten digit hydrologic units reference",