-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathGetBBSData.R
110 lines (94 loc) · 5.08 KB
/
GetBBSData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
GetRouteData <- function(AOU=NULL, countrynum=NULL, states=NULL, year, weather=NULL, routes=NULL,
Zeroes=TRUE, TenStops = FALSE,
Dir="ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/") {
if(TenStops) {
DirData <- paste0(Dir, "States/")
CountString <- "^count"
} else {
if(any(year<1997)) stop("Data only available from 1997: pre-1997 data not integrated into this function for 50 stop data (yet)")
DirData <- paste0(Dir, "50-StopData/1997ToPresent_SurveyWide/")
CountString <- "^stop"
}
if(!is.null(countrynum) & any(!(countrynum%in%c(124, 484, 840)))) stop("countrynum should be either 124 (Canada), 484 (Mexico), or 840 (USA)")
GetDat <- function(file, dir, year, AOU, countrynum, states) {
dat <- GetUnzip(ZipName=paste0(dir, file), FileName=gsub("^Fifty", "fifty", gsub("zip", "csv", file)))
names(dat) <- tolower(names(dat))
if(is.null(year)) { UseYear <- TRUE } else { UseYear <- dat$year%in%year }
if(is.null(AOU)) { UseAOU <- TRUE } else { UseAOU <- dat$aou%in%AOU }
if(is.null(countrynum)) { UseCountry <- TRUE } else { UseCountry <- dat$countrynum%in%countrynum }
if(is.null(states)) { UseState <- TRUE } else { UseState <- dat$statenum%in%states }
Use <- UseYear & UseAOU & UseCountry & UseState
if(sum(Use)>0) {
dat$routeID <- paste(dat$statenum, dat[,grep("^[Rr]oute$", names(dat))])
dat <- subset(dat, subset=Use)
return(dat)
} else return(NULL)
}
# Only use the files we want
CountriesToUse <- if(!is.null(countrynum)) {
RegionsForZipFiles$countrynum%in%countrynum
} else {
TRUE
}
StatesToUse <- if(!is.null(states)) {
RegionsForZipFiles$RegionCode%in%states
} else {
TRUE
}
ToUse <- CountriesToUse & StatesToUse
if(TenStops) {
Files <- RegionsForZipFiles$FileName10stop[ToUse]
Missing <- ToUse & is.na(RegionsForZipFiles$FileName10stop)
} else { # 50 stop
Files <- RegionsForZipFiles$FileName50stop[ToUse]
Missing <- ToUse & is.na(RegionsForZipFiles$FileName50stop)
}
if(length(Files)==0) stop("No data for the states specified")
if(any(is.na(Files))) warning(paste0("No data for these states: ", paste(RegionsForZipFiles$'State/Prov/TerrName'[Missing], collapse=", ")))
Data.lst <- sapply(Files[!is.na(Files)], GetDat, dir=DirData, year=year, AOU=AOU, countrynum=countrynum, states=states, simplify=FALSE)
if(all(unlist(lapply(Data.lst, is.null)))) {
warning("no data, sorry")
AllData <- NULL
} else {
Data <- ldply(Data.lst)
# Get route data for all routes, and annual data
if(is.null(weather)) weather <-GetWeather(Dir)
if(is.null(year)) { UseYear <- TRUE } else { UseYear <- weather$Year%in%year }
if(is.null(countrynum)) { UseCountry <- TRUE } else { UseCountry <- weather$CountryNum%in%countrynum }
if(is.null(states)) { UseState <- TRUE } else { UseState <- weather$StateNum%in%states }
UseWeather <- UseYear & UseCountry & UseState
if(is.null(routes)) routes <- GetRoutes(Dir)
if(is.null(countrynum)) { UseCountry <- TRUE } else { UseCountry <- routes$CountryNum%in%countrynum }
if(is.null(states)) { UseState <- TRUE } else { UseState <- routes$StateNum%in%states }
UseRoutes <- UseCountry & UseState
CommonNames <- names(Data)[names(Data)%in%names(weather)]
CommonNames <- CommonNames[CommonNames%in%names(routes)]
# Subset data
# First, sites sampled in chosen year(s)
weather <-subset(weather, subset=UseWeather,
select=c(CommonNames, "Year", "Month", "Day", "RunType", "StateNum"))
# Route data for sites sampled in chosen years
routes <- subset(routes, subset=UseRoutes & routes$routeID%in%weather$routeID,
select=c(CommonNames, "Latitude", "Longitude", "StateNum"))
# merge data sets
dat.routeID.year <- paste(Data$routeID, Data$year, sep=".")
routes$routeID <- paste0(routes$StateNum, routes$routeID)
weather.routeID.year <- paste(paste0(weather$StateNum, weather$routeID), weather$Year, sep=".")
WeatherWhiches <- match(dat.routeID.year, weather.routeID.year)
RouteWhiches <- match(Data$routeID, routes$routeID)
AllData <- cbind(Data, weather[WeatherWhiches, !names(weather)%in%names(Data)],
routes[RouteWhiches, !names(routes)%in%names(Data)])
# if(!is.na(weather)) AllData <- merge(Data, weather, all=TRUE) # by=c("routeID", "RPID"),
# if(!is.na(routes)) AllData <- merge(AllData, routes, all=TRUE) # by="routeID",
AllData$SumCount <- apply(AllData[,grep(CountString, names(AllData))],1,sum, na.rm=TRUE)
if(!Zeroes) AllData <- subset(AllData, AllData$SumCount>0)
AllData <- AllData[,!names(AllData)%in%c(".id", "routedataid", "year")]
}
AllData
}
# GetRoutes function has an issue. It calls Routes.zip instead of routes.zip
GetRoutes <- function(Dir="ftp://ftpext.usgs.gov/pub/er/md/laurel/BBS/DataFiles/") {
routes=GetUnzip(ZipName=paste0(Dir, "routes.zip"), FileName="routes.csv")
routes$routeID=paste(routes$statenum, routes$Route)
routes
}