Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Devel #10

Merged
merged 19 commits into from
May 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions R/categorize_basins.R
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# add basins to receiver locations
# M. Johnston
# Tue Jan 10 09:11:02 2023 America/Los_Angeles ------------------------------


library(sf)
library(dplyr)

map = read_sf("data/spatial/Basins.kml")
gis = readRDS("data_clean/allgis.rds")
deps = readRDS("data_clean/alldeps.rds")
d = readRDS("data_clean/alldets.rds")


pnts_sf <- st_as_sf(gis, coords = c('Longitude', 'Latitude'), crs = st_crs(map))

Expand All @@ -30,19 +31,15 @@ gis[gis$Location_name == "SJR LR", ]
gis[gis$Location_name == "SJR UR", ]

# add basin to deployments
deps = readRDS("data_clean/alldeps.rds")
deps$combo = paste0(deps$Latitude, deps$Longitude)


ans = merge(deps, pnts[ , c("Location_name", "combo", "Basin")],
all.x = TRUE, by = c("Location_name", "combo"))

write.csv(ans, "data_clean/alldeps_with_basin.csv", row.names = FALSE)

# add basin to detections
d = readRDS("data_clean/alldets.rds")
d = d[ , c("Receiver", "Location_name", "TagID", "DateTimePST", "Origin")]
ans = read.csv("data_clean/alldeps_with_basin.csv")

ans2 = merge(d, ans[ , c("Location_name", "Receiver", "Basin")],
all.x = TRUE)
98 changes: 47 additions & 51 deletions R/parse_deployments.R → R/clean_deployments.R
Original file line number Diff line number Diff line change
@@ -1,21 +1,11 @@
# Parse Deployments tables
# Parse Deployment tables
# M. Johnston
# Wed Sep 28 08:50:08 2022 America/Los_Angeles ------------------------------

library(RSQLite)
library(data.table)
library(telemetry)
library(lubridate)
source("R/overlap_funs.R")
data.dir = "~/DropboxCFS/NEW PROJECTS - EXTERNAL SHARE/WST_Synthesis/Data/"
if(FALSE){
# YOLO deployments original
sql_loc = file.path(data.dir, "ac_telemetry_database.sqlite") # yolo + BARD
con = dbConnect(RSQLite::SQLite(), sql_loc)
ydep = dbGetQuery(con, "SELECT * FROM deployments;")
dbDisconnect(con)
saveRDS(ydep, "data/ydep.rds")
}
data.dir = readRDS("data/data_dir_local.rds")

## Receiver bounds: Longitude -120.0, Latitude 37.2
## Detection bounds: "2010-08-17" - "2022-01-01"
Expand All @@ -36,11 +26,11 @@ cols_keep = c("Location_name",

# Detections
# Compare the receivers our fish were actually detected on to our deployments data
dd = readRDS("data/WST_detections.rds")
dd = readRDS("data/WST_detections.rds") # made in combine_detections.R


# PATH deployments
path = readRDS("data/bard_depsQ42022.rds") # made in qaqc_klimbley.R
path = readRDS("data/bard_depsQ42022.rds") # made in get_bard_deployments.R
path$Origin = "PATH"

new = c(
Expand Down Expand Up @@ -70,7 +60,7 @@ chk = path[outside, ]
path = path[!outside, ]

# load Yolo and Lodi
ydep = readRDS("data/ydep.rds")
ydep = readRDS(file.path(data.dir, "Yolo/ydep.rds")) # made in get_yolo_raw_data.R
ydep$Location_name = ydep$Station
ydep$Notes = paste(ydep$VRLNotes, ydep$DeploymentNotes)

Expand Down Expand Up @@ -103,24 +93,6 @@ comp = path[path$Receiver %in% unique(sjr$Receiver), ] # checked these w/ Laura
range(comp$End)
path = dplyr::anti_join(path, comp)

# end_times = readxl::read_excel(file.path(data.dir, "Lodi/OneDrive_1_6-13-2022/Full Receiver History_Updated Jun2017.xlsx"), sheet = 1)
#
# end_times$`Receiver Time @ Log Start (PST)` = force_tz(end_times$`Receiver Time @ Log Start (PST)`, tz = "Etc/GMT+8")
#
# end_times$StartDate = as.Date(end_times$`Receiver Time @ Log Start (PST)`)
# sjr$StartDate = as.Date(sjr$Start)
#
# # make a column to math on
# end_times$Rec_StartDate = paste(end_times$Serial_Number, end_times$StartDate)
# sjr$Rec_StartDate = paste(sjr$Receiver, sjr$StartDate)
#
# # for each index in x, find the first match in y
# ii = match(end_times$Rec_StartDate, sjr$Rec_StartDate) # has NAs, but need to keep it in the same order
# sjr$StartDate[na.omit(ii)] = end_times$StartDate[!is.na(ii)] # pattern for using match
#
# sjr$End[na.omit(ii)] = end_times$`Receiver Time @ Log Upload (PST)`[!is.na(ii)] # pattern for using match


yolo_sjr = c(unique(ydep$Receiver), unique(sjr$Receiver))

# Isolate receivers on which we have detections
Expand All @@ -133,11 +105,11 @@ all(rec_dets %in% rec_all)
rec_dets[!rec_dets %in% rec_all] # 546698 has detections but no deployment info

range(dd$DateTimeUTC[dd$Receiver == 546698])
nrow(dd[dd$Receiver == 546698, ]) # 20k detections between Jan 2018 & July 2018
nrow(dd[dd$Receiver == 546698, ]) # 21k detections between Jan 2018 & July 2018
length(unique(dd$TagID[dd$Receiver == 546698])) # 55 of our fish; not insignificant

# is it in the old records?
bd = readRDS("data/BARD_deployments_all_2022-06-24.rds")
bd = readRDS("data/BARD_deployments_all_2022-06-24.rds") # query prior to the Sept 2022 one
ans = bd[bd$Receiver_ser_num == 546698, ] # decker island from Jan 17-July 23; exact missing period

# add this deployment info in:
Expand All @@ -160,7 +132,7 @@ rec_all = unique(c(unique(path$Receiver), yolo_sjr))
stopifnot(all(rec_dets %in% rec_all)) # should pass now

# YOLO
ylocs = readxl::read_excel("data/YoloLatLongs.xlsx")
ylocs = readxl::read_excel(file.path(data.dir, "Yolo/YoloLatLongs.xlsx"))
colnames(ylocs) = c("Location_name", "Location long", "Latitude", "Longitude")
ylocs$Origin = "YOLO 2020"

Expand All @@ -170,10 +142,9 @@ ydep = merge(ydep, ylocs[ , c("Location_name", "Longitude", "Latitude")], all.x
ydep$End[is.na(ydep$End)] <- as.POSIXct("2019-08-21 11:05:00", tz = "Etc/GMT+8")

# LODI
lodi = readxl::read_excel("~/DropboxCFS/NEW PROJECTS - EXTERNAL SHARE/WST_Synthesis/Data/Lodi/LFWO_SJR_WST_Receiver_Deployment.xlsx")
lodi = readxl::read_excel(file.path(data.dir, "Lodi/LFWO_SJR_WST_Receiver_Deployment.xlsx"))

# need to take it down to 4 decimals; verify that there's only 1 lat/long per location_name

lodi = dplyr::rename(lodi, Location_name = Station)
lodi = as.data.frame(lodi[ , c("Location_name", "Longitude", "Latitude")])
stopifnot(all(sjr$Location_name %in% lodi$Location_name))
Expand Down Expand Up @@ -201,10 +172,9 @@ scs_ins$Origin = "BARD 2020"
scs_ins$Notes = "this row added in parse_deployments.R, Dec 2022"

alldeps = rbind(alldeps, scs_ins)

alldeps = alldeps[order(alldeps$Receiver, alldeps$Start), ]

# check
# manual checks
summary(unique(alldeps$Receiver))
summary(alldeps$Latitude)
summary(alldeps$Longitude)
Expand All @@ -216,22 +186,48 @@ range(alldeps$Start)
alldeps$Longitude[alldeps$Longitude > 0] <- alldeps$Longitude[alldeps$Longitude > 0]*(-1)
stopifnot(alldeps$Longitude < 0)

# add column of PST
# add column of UTC
alldeps$StartUTC = with_tz(alldeps$Start, tzone = "UTC")
alldeps$EndUTC = with_tz(alldeps$End, tzone = "UTC")

# only have deployments that end >= June 2010:
outside = alldeps$End < as.POSIXct("2010-05-30 23:23:23", tz = "Etc/GMT+8")
chk = alldeps[outside, ]
alldeps = alldeps[!outside, ]
saveRDS(alldeps, "data_clean/alldeps.rds")
if(any(outside)) alldeps = alldeps[!outside, ]

## Add basin to deployment table
library(sf)
library(dplyr)
map = read_sf(file.path(data.dir, "spatial/Basins.kml"))

alldeps$combo = paste0(alldeps$Latitude, alldeps$Longitude)

alldeps %>%
group_by(Location_name) %>%
arrange(Start) %>%
filter(!duplicated(combo)) %>%
select(-StartUTC, -EndUTC) %>%
ungroup() %>%
arrange(Origin, Location_name, Start) -> cgis

cgis = as.data.frame(cgis)
stopifnot(!any(table(cgis$Location_name)>1)) # make sure each location is associated with a single lat/lon combo

if(FALSE){
deps = readRDS("data_clean/alldeps.rds")
# bounds
# allgis = subset(deps, Longitude > -122.65 & Longitude < -120.0 & Latitude > 37.1 & Latitude < 44) # cuts out the Pt_Reyes receivers, but we don't need to
write.csv(deps, "data_clean/alldeps.csv")

}
pnts_sf <- st_as_sf(cgis, coords = c('Longitude', 'Latitude'), crs = st_crs(map))

pnts <- pnts_sf %>% mutate(
intersection = as.integer(st_intersects(geometry, map)),
Basin = if_else(is.na(intersection), 'Bay', map$Name[intersection])
)

pnts = as.data.frame(pnts)
v = table(pnts$Location_name)
stopifnot(!any(v>1)) # still only one locaiton name per combo

# add basin to deployments

alldeps = merge(alldeps, pnts[ , c("Location_name", "combo", "Basin")],
all.x = TRUE, by = c("Location_name", "combo"))

alldeps$combo = NULL # remove merging column; don't need in the final table

saveRDS(alldeps, "data_clean/alldeps.rds")
60 changes: 0 additions & 60 deletions R/combine_bard_yolo.R

This file was deleted.

89 changes: 89 additions & 0 deletions R/combine_detections.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# This script preps each raw detection table (BARD queried June 2022, LODI transferred May 2022, Yolo queried June 2022) to match the format of the Yolo Detections from 2012-2018 (https://github.com/fishsciences/ybt), and then joins them for QA/QCing.
# M. Johnston
library(lubridate)
library(data.table)

data.dir = readRDS("data/data_dir_local.rds")
bard_loc = file.path(data.dir, "/Davis/allBARDdets_2022-06-03.rds") # full BARD detections table
bard = readRDS(bard_loc)
ydets = readRDS(file.path(data.dir, "Yolo/yolo_detections.rds"))
names(bard); names(ydets)

# Check tz; Format bard same as detection table in Yolo detections
tz(bard$Detect_date_time)
bard$DateTimeUTC = as.character(bard$Detect_date_time)
bard$Receiver = paste("VR2W", as.character(bard$Receiver_ser_num), sep = "-")
bard$TagID = paste(bard$Codespace, bard$Tag_ID, sep = "-")
bard[,c("TagName", "TagSN")] = NA
bard$SensorValue = bard$Data
bard$SensorUnit = bard$Units
bard$DetOrigin = "BARD"

ydets$DetOrigin = "YOLO"
tz(ydets$DateTimeUTC)
cols = colnames(ydets)
stopifnot(all(cols %in% colnames(bard)))

# Combine and find dups
tmp = as.data.table(rbind(ydets[,cols], bard[,cols]))
# Need to exclude NA cols, because not marked as dups
i = duplicated(tmp[,c("DateTimeUTC", "Receiver", "TagID")]) # data.table method much faster
table(i)

# duplicated rows should only be in BARD, not Yolo dets
in_bard = i[(nrow(ydets)+1):length(i)]
stopifnot(length(in_bard) == nrow(bard))
stopifnot(sum(in_bard) == sum(i)) # yolo dets should not have duplicates, as they were already removed in the ybt project

tmp = as.data.frame(tmp)
bard_tmp = tmp[ !i, cols]
stopifnot(nrow(tmp) - nrow(bard_tmp) == sum(i)) # should have only removed the dups

# SJR Detections
data_dir = file.path(data.dir, "Lodi/Lodi_DC/")
files = list.files(path = data_dir, pattern = ".csv", full.names = TRUE, recursive = TRUE)
dd = do.call(rbind, lapply(files, read.csv))

# Clean up column names
dd$DetOrigin = "SJR"
dd = dplyr::select(dd,
DateTimeUTC = Date.and.Time..UTC.,
Receiver,
TagID = Transmitter,
TagName = Transmitter.Name,
TagSN = Transmitter.Serial,
SensorValue = Sensor.Value,
SensorUnit = Sensor.Unit,
DetOrigin
)
# Check
tz(dd$DateTimeUTC)
range(dd$DateTimeUTC)
dd$DateTimeUTC = as.character(dd$DateTimeUTC)
# Combine and find dups
ltmp = as.data.table(dd[ , cols])
# Need to exclude NA cols, because not marked as dups
i = duplicated(ltmp[ , c("DateTimeUTC", "Receiver", "TagID")]) #
sum(i) # 6132 rows

sjr = as.data.frame(ltmp)
sjr = sjr[!i, ]
stopifnot(nrow(ltmp) - nrow(sjr) == sum(i))

all_dets = rbind(bard_tmp, sjr)
all_dets$DateTimeUTC = as.POSIXct(all_dets$DateTimeUTC, tz = "UTC")
all_dets = all_dets[all_dets$DateTimeUTC > ymd_hms("2010-08-17 00:00:00", tz = "UTC"), ] # study

# subset detections down to just our study fish
tags = readRDS("data_clean/alltags.rds")

dd = subset(all_dets, TagID %in% tags$TagCode)
dd = tidyr::separate(dd, col = Receiver, sep = "-", into = c("Freq", "Receiver"))
dd$Receiver = as.integer(dd$Receiver)
dd = dd[ , c("TagID", "DateTimeUTC", "Receiver", "DetOrigin")]

dd = merge(dd, tags[ , c("TagCode", "StudyID")], by.x = "TagID", by.y = "TagCode") # slow; better to summarise/table by TagCode first, and then join that smaller table

dd$DateTimePST = with_tz(dd$DateTimeUTC, tz = "Etc/GMT+8")

saveRDS(dd, "data/WST_detections.rds")
Loading