Skip to content

Latest commit

 

History

History
263 lines (217 loc) · 9.01 KB

S2.md

File metadata and controls

263 lines (217 loc) · 9.01 KB

Introduction

This code calculates indicator S2 of NorInAliS.

Preparations

Load data from the Alien Species List 2018:

fab  <- read.csv2(url("https://datadryad.org/stash/downloads/file_stream/359484"),
                  as.is=T)
path <- read.csv2(url("https://datadryad.org/stash/downloads/file_stream/1823673"),
                  as.is=T)

Translate NAs into “unknown”:

for (i in 5:7) {
  path[which(is.na(path[, i])), i] <- "unknown"
}

Restrict data to alien species that are reproducing unaidedly in mainland Norway:

fab <- fab[which(fab$Status == "reproducing" & fab$Mainl),]

Restrict data to current pathways of introduction:

path <- path[path$Introd & path$Time == "current" & path$Name %in% fab$Name, ]

Define an auxiliary function:

# Combines text strings
"%+%" <- function(string1, string2) paste0(string1, string2)

Functions

Define indicator-specific functions:

asFreq  <- function(x) {
  # translates a frequency interval into a numerical value
  # Note that input is in events per decade,
  #  whereas output is in events per year!
  sapply(x, function(z) switch(z,
    "<1"=0.07, "1-8"=0.5, "9-19"=1.4, ">19"=4.6, NA))
}


asAbund <- function(x) {
  # translates an abundance interval into a numerical value
  sapply(x, function(z) switch(z,
    "1"=1, "2-10"=5, "11-100"=50, "101-1000"=500, ">1000"=5000, NA))
}


rincr <- function(n, min, max, r=TRUE) {
  # generates random numbers according to an increasing
  # triangular probability distribution
  min + sqrt(    if(r) runif(n) else 0.5) * (max - min)
}


rdecr <- function(n, min, max, r=TRUE) {
  # generates random numbers according to a decreasing
  # triangular probability distribution
  max - sqrt(1 - if(r) runif(n) else 0.5) * (max - min)
}


assignFrequencies <- function(n, pathways, pw, r=TRUE) {
  # assigns unknown frequencies according to the distribution of known frequencies,
  # separately for main pathway categories
  freq <- table(
    pathways$Freq[which(pathways$Freq != "unknown" & pathways$Cat == pw)]
  )[c("<1", "1-8", "9-19", ">19")]
  if (any(is.na(freq))) {
    freq[which(is.na(freq))] <- 0
  }
  freq <- freq / sum(freq)
  for (i in 4:2) {
    freq[i] <- sum(freq[1:i])
  }
  seq <- if (r) runif(n) else (1:n - 0.5) / n
  return(c("<1", "1-8", "9-19", ">19")[sapply(seq, function(x) which(freq > x)[1])])
}


S2a <- function(pathways) {
  # estimates S2(a) according to Equation ¤1
  return(length(unique(
    (pathways$Name %+% pathways$Subcat)[pathways$Introd & pathways$Time == "current"]
  )))
}


S2b <- function(pathways, nsim=0, CL=c(0.025, 0.25, 0.5, 0.75, 0.975)) {
  # estimates S2(b) according to Equation ¤2
  pathways <- pathways[which(pathways$Introd & pathways$Time == "current"),]
  results <- matrix(0, nrow(pathways), max(1, nsim))
  freq <- matrix(pathways$Freq, nrow(pathways), max(1, nsim))
  for (pw in unique(pathways$Cat)) {
    w <- which(pathways$Freq == "unknown" & pathways$Cat == pw)
    if (length(w)) {
      freq[w,] <- assignFrequencies(length(w) * max(1, nsim),
                                    pathways, pw, r = nsim >= 1)
    }
  }
  W <- freq == "<1"
  results[W] <-                rincr(sum(W), 0.01,  0.1, r = nsim >= 1)
  W <- freq == "1-8"
  results[W] <- if (nsim >= 1) runif(sum(W), 0.10,  0.9) else 0.5
  W <- freq == "9-19"
  results[W] <- if (nsim >= 1) runif(sum(W), 0.90,  1.9) else 1.4
  W <- freq == ">19"
  results[W] <-                rdecr(sum(W), 1.90, 10.0, r = nsim >= 1)
  if (nsim >=1 ) {
    freq <- c(mean(apply(results, 2, sum)),
              sd(apply(results, 2, sum)),
              quantile(apply(results, 2, sum), CL))
    names(freq) <- c("Average", "St.Dev.", (CL * 100) %+% "%CL")
    return(freq)
  } else {
    return(sum(asFreq(freq)))
  }
}


S2c <- function(pathways) {
  # estimates S2(c) according to Equation ¤3
  # (Note: this is a minimum implementation of indicator definition S2c!
  #  It illustrates the way S2c would be estimated, but it currently ignores
  #  uncertainty and simply assumes that the (utterly few) abundances reported
  #  are in fact representative of the unknown abundances, which is unlikely.)
  freq <- asFreq(pathways$Freq[which(pathways$Introd & pathways$Time == "current")])
  w <- which(is.na(freq))
  if (length(w)) {
    mod <- lm(asFreq(Freq) ~ Cat, data=pathways, subset=which(Freq != "unknown"))
    freq[w] <- predict(mod, data.frame(Cat = pathways$Cat[w]))
  }
  abund <- asAbund(pathways$Abund[pathways$Introd & pathways$Time == "current"])
  w <- which(is.na(abund))
  if (length(w)) {
    abund[w] <- mean(abund[-w])
  }
  return(sum(freq * abund))
}

Calculations

Summarise available data on frequency and abundance:

{
  N <- length(which(path$Introd & path$Time == "current"))
  cat("Distribution of frequencies of introduction events (per decade):\n")
  print(round(table(path$Freq )[c(1, 3, 4, 2, 5   )] / N, 4))
  cat("\n\nDistribution of abundances (individuals) per introduction event:\n")
  print(round(table(path$Abund)[c(2, 5, 4, 3, 1, 6)] / N, 4))
}

## Distribution of frequencies of introduction events (per decade):
## 
##      <1     1-8    9-19     >19 unknown 
##  0.2181  0.0437  0.4163  0.1959  0.1259 
## 
## 
## Distribution of abundances (individuals) per introduction event:
## 
##        1     2-10   11-100 101-1000    >1000  unknown 
##   0.0012   0.0070   0.0052   0.0017   0.0111   0.9738

Show how frequencies of introduction vary between main pathway categories:

summary(lm(asFreq(Freq) ~ Cat, data=path, subset=which(Freq != "unknown")))

## 
## Call:
## lm(formula = asFreq(Freq) ~ Cat, data = path, subset = which(Freq != 
##     "unknown"))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1277 -1.5973 -0.2948 -0.2673  2.9327 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.66727    0.09511  17.531  < 2e-16 ***
## Catescape    0.02750    0.10704   0.257    0.797    
## Catrelease   0.50273    0.34346   1.464    0.143    
## Catstowaway  0.21199    0.21923   0.967    0.334    
## Catunaided   1.53045    0.28937   5.289 1.41e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.617 on 1494 degrees of freedom
## Multiple R-squared:  0.02102,    Adjusted R-squared:  0.0184 
## F-statistic:  8.02 on 4 and 1494 DF,  p-value: 2.141e-06

Estimate the three indicator definitions for the current Alien Species List:

S2b_results <- S2b(path, nsim=100000)

{
  cat("\nIndicator value for S2(a): " %+%             S2a(path),    "\n")
  cat("\nIndicator value for S2(b):\n");  print(round(S2b_results))
  cat("\nIndicator value for S2(c): " %+%       round(S2c(path)), "\n\n")
}

## 
## Indicator value for S2(a): 1714 
## 
## Indicator value for S2(b):
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##    3042      46    2951    3010    3042    3073    3133 
## 
## Indicator value for S2(c): 6626096

Disaggregate indicator S2(b) for main pathway categories:

S2b_disaggr <- list()
for (pw in c("release", "escape", "contaminant", "stowaway", "unaided")) {
  cat("\n\n" %+% pw %+% ":\n")
  S2b_disaggr[[pw]] <- S2b(path[which(path$Cat == pw),], nsim=100000)
  print(S2b_disaggr[[pw]])
}

## 
## 
## release:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##      59       7      47      54      58      63      72 
## 
## 
## escape:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##    1905      32    1844    1883    1905    1926    1968 
## 
## 
## contaminant:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##     624      23     579     608     623     639     670 
## 
## 
## stowaway:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##     218      15     189     208     217     228     249 
## 
## 
## unaided:
## Average St.Dev.  2.5%CL   25%CL   50%CL   75%CL 97.5%CL 
##     237      18     202     224     236     249     272