forked from eco4cast/neon4cast-phenology
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcombine_forecasts_phenology.R
79 lines (61 loc) · 2.99 KB
/
combine_forecasts_phenology.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
library(tidyverse)
library(tools)
base.dir = '/efi_neon_challenge/forecasts/'
fnames <- tibble(files = list.files(path = base.dir, recursive = TRUE, full.names = TRUE)) %>%
filter(file_ext(files) %in% c("nc","csv")) %>%
filter(!str_detect(files, "not_in_standard")) %>%
mutate(basename = basename(files))
d <- unlist(str_split_fixed(tools::file_path_sans_ext(fnames$basename), pattern = "-", 5))
data <- unlist(str_split_fixed(tools::file_path_sans_ext(fnames$basename), pattern = "-", 5)) %>%
as_tibble() %>%
unite("date", V2:V4, sep = "-") %>%
rename("theme" = V1,
"team" = V5) %>%
bind_cols(fnames) %>%
select(-basename)
data <- data %>% filter(theme == "phenology")
bad_submissions <- c("/efi_neon_challenge/forecasts//phenology/phenology-2021-03-01-PEG.csv")
combined <- NULL
#for(i in 1:100){
for(i in 1:nrow(data)){
print(i)
#if(!(data$files[i] %in% bad_submissions)){
d <- neon4cast:::read_forecast(data$files[i])
if("ensemble" %in% colnames(d)){
d <- d %>%
group_by(time, siteID, forecast_start_time, horizon, team, theme) %>%
summarise(mean = mean(gcc_90, na.rm = TRUE),
sd = sd(gcc_90, na.rm = TRUE),
upper95 = quantile(gcc_90, 0.975, na.rm = TRUE),
lower95 = quantile(gcc_90, 0.025, na.rm = TRUE)) %>%
pivot_longer(cols = c("mean","sd", "upper95","lower95"), names_to = "statistic", values_to = "gcc_90") %>%
select(time, siteID, forecast_start_time, horizon, team, theme, statistic, gcc_90)
}else{
d <- d %>%
select(time, siteID, forecast_start_time, horizon, team, theme, statistic, gcc_90) %>%
pivot_wider(names_from = statistic, values_from = gcc_90, values_fn = mean) %>%
mutate(upper95 = mean + 1.96 * sd,
lower95 = mean - 1.96 * sd) %>%
pivot_longer(cols = c("mean","sd", "upper95","lower95"), names_to = "statistic", values_to = "gcc_90")
}
combined <- bind_rows(combined, d)
#}
}
write_csv(combined, file = "~/Documents/scripts/neon4cast-phenology/combined_forecasts.csv")
aws.s3::put_object("~/Documents/scripts/neon4cast-phenology/combined_forecasts.csv", object = "not_in_standard/phenology_combined_forecasts.csv",bucket = "forecasts")
combined <- read_csv("~/Documents/scripts/neon4cast-phenology/combined_forecasts.csv")
write_csv(combined, file = "/efi_neon_challenge/forecasts/not_in_standard/combined_forecasts.csv")
obs <- read_csv("/efi_neon_challenge/targets/phenology/phenology-targets.csv.gz") %>%
select(time, siteID, gcc_90) %>%
rename(gcc_90obs = gcc_90)
combined %>%
filter(forecast_start_time == lubridate::as_date("2021-04-10"),
time <= forecast_start_time + lubridate::days(35)) %>%
pivot_wider(names_from = statistic, values_from = gcc_90) %>%
left_join(obs) %>%
ggplot(aes(x = time, color = team)) +
geom_line(aes(y = mean)) +
geom_ribbon(aes(x = time, ymin = lower95, ymax = upper95, fill = team), alpha = 0.4) +
geom_point(aes(y = gcc_90obs)) +
facet_wrap(vars(siteID)) +
theme_bw()