Initial set of files

ltrr-arizona-edu · Oct 27, 2024 · dfec680 · dfec680
1 parent b5359a0
commit dfec680
Show file tree

Hide file tree

Showing 68 changed files with 9,995 additions and 0 deletions.
diff --git a/AbbreviationsTRISH.odt b/AbbreviationsTRISH.odt
diff --git a/AbbreviationsTRISH.pdf b/AbbreviationsTRISH.pdf
diff --git a/CrossValid1.R b/CrossValid1.R
@@ -0,0 +1,126 @@
+CrossValid1<- function(X, y, nNeg, nPos, i2) {
+  # Leave-m-out cross-validation of a previously estimated stepwise regression model. 
+  # D Meko
+  # last revised 2024-03-08
+  #
+  # IN
+  # X is matrix of potential predictors
+  # y is vector of predictand
+  # nNeg and nPos are the maximum negative and positive lags that were allowed in
+  #   the stepwise regression model of predictand on chronologies. These settings
+  #   are used by function LeaveOut to compute m=1+4*max(nNeg,nPos), the nunber
+  #   of observations to be omitted in each iteration of leave-m-out cross-
+  #   validation
+  # i2 is order that columns of matrix of potential predictors entered model
+  #   in calling script or function, which is assumed to have applied stepwise
+  #   regression; that order is repeated here in the stepwise cross-validation
+  # 
+  # OUT
+  # CV [named list] statistics for maximim-RE model from forward stepwise
+  #   Names self-describing. Includes step of maximum RE; columns of the original
+  #   matrix of potential predictors in the maximum-RE model; reduction of error
+  #   statistic, validation root-mean-square errot, cross-validation predictions,
+  #   cross-validation residuals of the final model; and number of observations
+  #   left out in leave-m-out cross-validation. Also in CV is the numerical
+  #   vector REcvAll of RE at each step of the modeling, and corresponding vector
+  #   RMSEVall or RMSE at each step
+  #
+  # NOTES
+  #
+  # Absolutely important that input column pointer i1 indicates columns of X 
+  # in order (left to right) as they entered stepwise in the regression assumed
+  # to have been done before calling this function.
+  #
+  # revised 2024-03-08: cosmetic; expansion and clarification of comments
+
+  library("pracma") # needed for emulation of Matlab "backslash" operator through
+  # QR decomposition
+
+  source(paste(code_dir,"LeaveOut.R",sep="")) # form pointer matrix for leave-m-out cross-validation
+
+
+  #--- Build pointer matrix for predictor sets
+
+  X<-as.matrix(X)
+  mX <-dim(X)[1]
+  y <- as.matrix(y)
+  H<-LeaveOut(nNeg,nPos,mX)
+  Lin <- H$Lin
+  mOut <- H$NumberLeftOut
+
+
+  #### Cross-validation modeling
+
+  #... Storage for models for all steps
+  E1 <- matrix(NA,mX,length(i2)) # to store cross-valid residuals for all models
+  P1 <- E1 # to hold cross-validation predictions ...
+  RMSEvAll <- rep(NA,length(i2)) # to store RMSEv for all models
+  REall <- RMSEvAll  # ... RE ...
+
+  for (k in 1:length(i2)){
+
+    #--- Storage for various 1-col matrices specific to k-step model
+    w1 <- matrix(NA,mX,1) # cv predictions
+    w2 <-  w1 # null predictions (equal to calibration means)
+    ithis <- i2[1:k]
+
+    #--- long-term predictor  matrix
+    Xthis <- as.matrix(X[,ithis])
+    a1this <- matrix(1,mX,1)
+    Xthis <- cbind(a1this,Xthis)
+
+
+    for (n in 1:mX){
+
+      #--- Build predictor matrix
+      Lthis <- Lin[,n]
+      Lthis <- as.logical(as.matrix(Lthis))
+      nthis <- sum(Lthis)
+      u <- as.matrix(X[Lthis,ithis])
+      a1 <- matrix(1,nthis,1)
+      U <- cbind(a1,u) # predictor matrix
+
+      # Build predictand as 1-col matrix
+      v <- as.matrix(y[Lthis])
+
+      #--- Matrix left division to estimate regression parameters
+      b <- mldivide(U,v) # [matrix, 1 col, with coefficients, constant term first]
+
+      #--- Estimated predictand for central "left-out" observation
+      vhat1 <- Xthis[n,] %*% b
+      w1[n] <- vhat1;
+      w2[n] <- mean(v)
+
+    }
+
+    #--- Residuals time series
+    e1<-y-w1 # cross-validation 
+    e2 <- y-w2 # null-model  (using calib means as predictions)
+
+    #--- Validaton statistics
+    SSE1  <- sum(e1*e1) # sum of squares of cross-validation errors
+    MSE1 <- SSE1/mX  # mean square error of cross-validation
+    RMSEv <- sqrt(MSE1) # root-mean-square error of cross-validation
+    SSE2 <- sum(e2*e2) # sum of square of null-model residuals
+    RE <- 1 - SSE1/SSE2 # reduction of error statistic
+
+    #--- Store statistics for model step
+    E1[,k] <- e1
+    P1[,k] <- w1
+    RMSEvAll[k]<- RMSEv
+    REall[k] <-RE
+  }
+  #--- Find maximum-RE model and its statistics
+  kmax <- which.max(REall)  # at this step
+  i2cv <- i2[1:kmax]
+  REwinner <- REall[kmax]
+  Pcv <- as.matrix(P1[,kmax]) # cv predictions
+  Ecv <- as.matrix(E1[,kmax]) # cv errors
+  RMSEcv <- RMSEvAll[kmax]
+
+
+  CV <- list("REmaxStep"=kmax,"ColumnsIn"=i2cv,"REcvAll"=REall,"REcv"=REwinner,
+             "CVpredictions"=Pcv,"CVresiduals"=Ecv,"RMSEvall"=RMSEvAll,"RMSEcv"=RMSEcv,
+             "LeftOut"=mOut)
+  return(CV)
+}
diff --git a/CrossValid2.R b/CrossValid2.R
@@ -0,0 +1,85 @@
+CrossValid2<- function(X, y, nNeg,nPos) {
+  # Leave-m-out cross-validation of a regression model. 
+  # D Meko
+  # last revised 20220104
+  #
+  # IN
+  # X is matrix of predictors
+  # y is vector of predictand
+  # nNeg and nPos are the maximum negative and positive lags that were considered when
+  #   the model was fit. 
+  #
+  # OUT
+  # Output [named list] cross-validation statistics: 
+  #   REcv (1x1)r cross-validation reduction of error
+  #   CVpredictions [m 1 col]r cross-validation predictions
+  #   CVresidual [m, 1 col]r cross-validation residuals
+  #   RMSEcv (1x1)r root-mean-square error of cross-validation
+  #   LeftOut (1x1)i how many obs left out in each cross-validation model
+  #
+  # This is simplified from CrossValid1(), which handles various steps of a model
+  # previously fit by forward stepwise regression.
+
+  library("pracma") # needed for emulation of Matlab "backslash" operator through
+  # QR decomposition
+
+  source(paste(code_dir,"LeaveOut.R",sep="")) # form pointer matrix for leave-m-out cross-validation
+
+  #--- Build pointer matrix for cross-validation predictor sets
+
+  X<-as.matrix(X)
+  mX <-dim(X)[1]
+  y <- as.matrix(y)
+  H<-LeaveOut(nNeg,nPos,mX)
+  Lin <- H$Lin # logical pointer matrix; each col marks obs to use as 1 
+  mOut <- H$NumberLeftOut
+
+
+  #### Cross-validation modeling
+
+  #--- Storage 
+  w1 <- matrix(NA,mX,1) # to hold cv predictions
+  w2 <-  w1 # to hold null predictions (equal to calibration means)
+
+  #--- long-term predictor matrix, with 1's in first col
+  a1this <- matrix(1,mX,1)
+  Xthis <- cbind(a1this,X)
+
+
+  for (n in 1:mX){ # Loop over observations
+
+    #--- Build predictor matrix
+    Lthis <- Lin[,n]
+    Lthis <- as.logical(as.matrix(Lthis))
+    nthis <- sum(Lthis)
+    u <- as.matrix(X[Lthis,])
+    a1 <- matrix(1,nthis,1)
+    U <- cbind(a1,u) # predictor matrix, this cv model
+
+    # Build predictand as 1-col matrix
+    v <- as.matrix(y[Lthis])
+
+    #--- Matrix left division to estimate regression parameters
+    b <- mldivide(U,v) # [matrix, 1 col, with coefficients, constant term first]
+
+    #--- Estimated predictand for central "left-out" observation
+    vhat1 <- Xthis[n,] %*% b
+    w1[n] <- vhat1;
+    w2[n] <- mean(v)
+  }
+
+  #--- Time series of residuals
+  e1<-y-w1 # cross-validation 
+  e2 <- y-w2 # null-model  (using calib means as predictions)
+
+  #--- Validaton statistics
+  SSE1  <- sum(e1*e1) # sum of squares of cross-validation errors
+  MSE1 <- SSE1/mX  # mean square error of cross-validation
+  RMSEcv <- sqrt(MSE1) # root-mean-square error of cross-validation
+  SSE2 <- sum(e2*e2) # sum of square of null-model residuals
+  REcv <- 1 - SSE1/SSE2 # reduction of error statistic
+
+  Output <- list("REcv"=REcv,"CVpredictions"=w1,"CVresiduals"=e1,"RMSEcv"=RMSEcv,
+             "LeftOut"=mOut)
+  return(Output)
+}
diff --git a/EffectSS.R b/EffectSS.R
@@ -0,0 +1,113 @@
+EffectSS <- function(x,y) {
+  # D. Meko 
+  # Last revised 2022-09-09
+  # Effective sample size -- effective number of "independent" observations.
+  #
+  # Effective sample size, Nprime, is computed from the lag-1 autocorrelation of a time series
+  # or pair of series. In univariate mode, Nprime can be applied for adjustment of significance
+  # of univariate statistics (e.g., uncertainty of the sample mean or variance). In bivariate
+  # mode, the effecitive sample size can be applied to adjust signficance of the correlation
+  # coefficient for the two series.
+  #
+  #---IN
+  #
+  # x: time series matrix or vector without any NA; number of observations mx, number of series nx
+  # y: ditto; but if passing vector and matrix, make sure y is the vector and x the matrix
+  #
+  #---OUT
+  #
+  # Output: list with fields
+  #   Nprime: scalar or vector of effective sample size 
+  #   Lflag: flag (logical, length 2)
+  #     (1) x or y have at least one NA 
+  #     (2) y is not vector and not same col-size as x
+  #   ErrorMessage [vector]c : error message associated with Lflag
+  #
+  #---NOTES
+  #
+  # If input argument y is NA, Nprime is computed in univariate mode:
+  #   If x is vector, Nprime is the scalar effective sample size
+  #   If x is a matrix, Nprime is the vector of the effective sample sizes of the individual series
+  # If input argument y is not NA, Nprime is computed in bivariate mode:
+  #   If y is a vector, Nprime is the effective sample size for correlation of y with all
+  #     series in x (Nprime can be scalar or vector, depending on whether x is scalar or vector)
+  #   If y is a matrix with ny>1 columns, ny must equal nx, and Nprime is the effective sample size
+  #     for correlation of each column of x with same column of y (Nprime is a vector)
+  # Method. In univariate mode, if original sample size is N, effective sample size is
+  #   Nprime = N(1-r1)/(1+r1), where r1 is the lag-1 autocorrelation
+  # Method. In bivariate mode, for pair of series, x and y, effective sample size is
+  #   Nprime = N(1-r1r2)/(1+r1r2), where r1 is lag-1 autocorrelation of x and r2 is lag-1
+  #   autocorrelation of y
+
+  source(paste(code_dir,"LagkAcc.R",sep="")) # optional transformation of flows
+  Lflag <-c(FALSE,FALSE) # initialize as no error flags
+  ErrorMessage <- "No problems"
+  Nprime <- NA
+
+  klag <-1 # will only need lag-1 autocorrelation
+
+  if (!all(complete.cases(x)) | (!all(is.na(y)) & !all(complete.cases(y)))){
+    # ERROR MESSAGE
+    Lflag[1]<-TRUE
+    ErrorMessage <- 'x or y  contain a NA'
+    Output <- list(Lflag=Lflag,ErrorMessage=ErrorMessage,Nprime=Nprime)
+    return(Output)
+  }
+  if (is.vector(x)){
+    N <- length(x)
+    nx <-1
+  } else {
+    N <- dim(x)[1]
+    nx <- dim(x)[2]
+  }
+  if (all(is.na(y))){
+    # Univariate mode for effective sample size
+    ResTemp <- LagkAcc(x,klag)
+    a <- 1-ResTemp$rk
+    b <- 1+ResTemp$rk
+    f <- a/b
+    Nprime <- floor(f*N)
+    L <- ResTemp$rk <= 0
+    Nprime[L] <- N # if lag-1 r of either series lag-1 autocorr non-positive, Nprime equals N
+  } else {
+    # bivariate mode (e.g., for significane adjustment for correlation)
+    if (is.vector(y)){
+      if (nx ==1){
+        # both x and y are vectors
+        ResTemp<- rkGet(x,y,klag)
+        r1x <- ResTemp$r1x; r1y <- ResTemp$r1y
+      } else {
+        # y vector, x matrix
+        y = matrix(replicate(nx,y),nrow=N) # replicate y to same col-size as x
+        ResTemp<- rkGet(x,y,klag)
+        r1x <- ResTemp$r1x; r1y <- ResTemp$r1y
+      }
+    } else {
+      # y and x both matrix
+      ny <- dim(y)[2]
+      if (ny != nx){
+        # ERROR MESSAGE
+        Lflag[2]<-TRUE
+        ErrorMessage <- 'y is matrix and not same col-size as x'
+        Output <- list(Lflag=Lflag,ErrorMessage=ErrorMessage,Nprime=Nprime)
+        return(Output)
+      }
+      ResTemp<- rkGet(x,y,klag)
+      r1x <- ResTemp$r1x; r1y <- ResTemp$r1y
+    }
+    rr <- r1x * r1y # products of lag-1 autocorrelation
+    f <- (1-rr)/(1+rr)
+    Nprime <- floor(f*N)
+    L <- r1x <=0 | r1y <=0
+    Nprime[L] <- N
+  }
+
+  Output <- list(Nprime=Nprime,Lflag=Lflag,ErrorMessage=ErrorMessage)
+}
+rkGet <- function(x,y,klag){
+  ResTempx <- LagkAcc(x,klag)
+  ResTempy <- LagkAcc(y,klag)
+  r1x <- ResTempx$rk
+  r1y <- ResTempy$rk
+  Out1 <- list(r1x=r1x,r1y=r1y)
+}