This code calculates indicator S2 of NorInAliS.
Load data from the Alien Species List 2018:
- The “fab” data are available from Sandvik et al. (2020).
- The “path” data are available from Sandvik et al. (2022).
fab <- read.csv2(url("https://datadryad.org/stash/downloads/file_stream/359484"),
as.is=T)
path <- read.csv2(url("https://datadryad.org/stash/downloads/file_stream/1823673"),
as.is=T)
Translate NA
s into “unknown”:
for (i in 5:7) {
path[which(is.na(path[, i])), i] <- "unknown"
}
Restrict data to alien species that are reproducing unaidedly in mainland Norway:
fab <- fab[which(fab$Status == "reproducing" & fab$Mainl),]
Restrict data to current pathways of introduction:
path <- path[path$Introd & path$Time == "current" & path$Name %in% fab$Name, ]
Define an auxiliary function:
# Combines text strings
"%+%" <- function(string1, string2) paste0(string1, string2)
Define indicator-specific functions:
asFreq <- function(x) {
# translates a frequency interval into a numerical value
# Note that input is in events per decade,
# whereas output is in events per year!
sapply(x, function(z) switch(z,
"<1"=0.07, "1-8"=0.5, "9-19"=1.4, ">19"=4.6, NA))
}
asAbund <- function(x) {
# translates an abundance interval into a numerical value
sapply(x, function(z) switch(z,
"1"=1, "2-10"=5, "11-100"=50, "101-1000"=500, ">1000"=5000, NA))
}
rincr <- function(n, min, max, r=TRUE) {
# generates random numbers according to an increasing
# triangular probability distribution
min + sqrt( if(r) runif(n) else 0.5) * (max - min)
}
rdecr <- function(n, min, max, r=TRUE) {
# generates random numbers according to a decreasing
# triangular probability distribution
max - sqrt(1 - if(r) runif(n) else 0.5) * (max - min)
}
assignFrequencies <- function(n, pathways, pw, r=TRUE) {
# assigns unknown frequencies according to the distribution of known frequencies,
# separately for main pathway categories
freq <- table(
pathways$Freq[which(pathways$Freq != "unknown" & pathways$Cat == pw)]
)[c("<1", "1-8", "9-19", ">19")]
if (any(is.na(freq))) {
freq[which(is.na(freq))] <- 0
}
freq <- freq / sum(freq)
for (i in 4:2) {
freq[i] <- sum(freq[1:i])
}
seq <- if (r) runif(n) else (1:n - 0.5) / n
return(c("<1", "1-8", "9-19", ">19")[sapply(seq, function(x) which(freq > x)[1])])
}
S2a <- function(pathways) {
# estimates S2(a) according to Equation ¤1
return(length(unique(
(pathways$Name %+% pathways$Subcat)[pathways$Introd & pathways$Time == "current"]
)))
}
S2b <- function(pathways, nsim=0, CL=c(0.025, 0.25, 0.5, 0.75, 0.975)) {
# estimates S2(b) according to Equation ¤2
pathways <- pathways[which(pathways$Introd & pathways$Time == "current"),]
results <- matrix(0, nrow(pathways), max(1, nsim))
freq <- matrix(pathways$Freq, nrow(pathways), max(1, nsim))
for (pw in unique(pathways$Cat)) {
w <- which(pathways$Freq == "unknown" & pathways$Cat == pw)
if (length(w)) {
freq[w,] <- assignFrequencies(length(w) * max(1, nsim),
pathways, pw, r = nsim >= 1)
}
}
W <- freq == "<1"
results[W] <- rincr(sum(W), 0.01, 0.1, r = nsim >= 1)
W <- freq == "1-8"
results[W] <- if (nsim >= 1) runif(sum(W), 0.10, 0.9) else 0.5
W <- freq == "9-19"
results[W] <- if (nsim >= 1) runif(sum(W), 0.90, 1.9) else 1.4
W <- freq == ">19"
results[W] <- rdecr(sum(W), 1.90, 10.0, r = nsim >= 1)
if (nsim >=1 ) {
freq <- c(mean(apply(results, 2, sum)),
sd(apply(results, 2, sum)),
quantile(apply(results, 2, sum), CL))
names(freq) <- c("Average", "St.Dev.", (CL * 100) %+% "%CL")
return(freq)
} else {
return(sum(asFreq(freq)))
}
}
S2c <- function(pathways) {
# estimates S2(c) according to Equation ¤3
# (Note: this is a minimum implementation of indicator definition S2c!
# It illustrates the way S2c would be estimated, but it currently ignores
# uncertainty and simply assumes that the (utterly few) abundances reported
# are in fact representative of the unknown abundances, which is unlikely.)
freq <- asFreq(pathways$Freq[which(pathways$Introd & pathways$Time == "current")])
w <- which(is.na(freq))
if (length(w)) {
mod <- lm(asFreq(Freq) ~ Cat, data=pathways, subset=which(Freq != "unknown"))
freq[w] <- predict(mod, data.frame(Cat = pathways$Cat[w]))
}
abund <- asAbund(pathways$Abund[pathways$Introd & pathways$Time == "current"])
w <- which(is.na(abund))
if (length(w)) {
abund[w] <- mean(abund[-w])
}
return(sum(freq * abund))
}
Summarise available data on frequency and abundance:
{
N <- length(which(path$Introd & path$Time == "current"))
cat("Distribution of frequencies of introduction events (per decade):\n")
print(round(table(path$Freq )[c(1, 3, 4, 2, 5 )] / N, 4))
cat("\n\nDistribution of abundances (individuals) per introduction event:\n")
print(round(table(path$Abund)[c(2, 5, 4, 3, 1, 6)] / N, 4))
}
## Distribution of frequencies of introduction events (per decade):
##
## <1 1-8 9-19 >19 unknown
## 0.2181 0.0437 0.4163 0.1959 0.1259
##
##
## Distribution of abundances (individuals) per introduction event:
##
## 1 2-10 11-100 101-1000 >1000 unknown
## 0.0012 0.0070 0.0052 0.0017 0.0111 0.9738
Show how frequencies of introduction vary between main pathway categories:
summary(lm(asFreq(Freq) ~ Cat, data=path, subset=which(Freq != "unknown")))
##
## Call:
## lm(formula = asFreq(Freq) ~ Cat, data = path, subset = which(Freq !=
## "unknown"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1277 -1.5973 -0.2948 -0.2673 2.9327
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.66727 0.09511 17.531 < 2e-16 ***
## Catescape 0.02750 0.10704 0.257 0.797
## Catrelease 0.50273 0.34346 1.464 0.143
## Catstowaway 0.21199 0.21923 0.967 0.334
## Catunaided 1.53045 0.28937 5.289 1.41e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.617 on 1494 degrees of freedom
## Multiple R-squared: 0.02102, Adjusted R-squared: 0.0184
## F-statistic: 8.02 on 4 and 1494 DF, p-value: 2.141e-06
Estimate the three indicator definitions for the current Alien Species List:
S2b_results <- S2b(path, nsim=100000)
{
cat("\nIndicator value for S2(a): " %+% S2a(path), "\n")
cat("\nIndicator value for S2(b):\n"); print(round(S2b_results))
cat("\nIndicator value for S2(c): " %+% round(S2c(path)), "\n\n")
}
##
## Indicator value for S2(a): 1714
##
## Indicator value for S2(b):
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 3042 46 2951 3010 3042 3073 3133
##
## Indicator value for S2(c): 6626096
Disaggregate indicator S2(b) for main pathway categories:
S2b_disaggr <- list()
for (pw in c("release", "escape", "contaminant", "stowaway", "unaided")) {
cat("\n\n" %+% pw %+% ":\n")
S2b_disaggr[[pw]] <- S2b(path[which(path$Cat == pw),], nsim=100000)
print(S2b_disaggr[[pw]])
}
##
##
## release:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 59 7 47 54 58 63 72
##
##
## escape:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 1905 32 1844 1883 1905 1926 1968
##
##
## contaminant:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 624 23 579 608 623 639 670
##
##
## stowaway:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 218 15 189 208 217 228 249
##
##
## unaided:
## Average St.Dev. 2.5%CL 25%CL 50%CL 75%CL 97.5%CL
## 237 18 202 224 236 249 272