Skip to content

Commit

Permalink
Initial set of files
Browse files Browse the repository at this point in the history
  • Loading branch information
Dave Meko committed Oct 27, 2024
1 parent b5359a0 commit dfec680
Show file tree
Hide file tree
Showing 68 changed files with 9,995 additions and 0 deletions.
Binary file added AbbreviationsTRISH.odt
Binary file not shown.
Binary file added AbbreviationsTRISH.pdf
Binary file not shown.
126 changes: 126 additions & 0 deletions CrossValid1.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
CrossValid1<- function(X, y, nNeg, nPos, i2) {
# Leave-m-out cross-validation of a previously estimated stepwise regression model.
# D Meko
# last revised 2024-03-08
#
# IN
# X is matrix of potential predictors
# y is vector of predictand
# nNeg and nPos are the maximum negative and positive lags that were allowed in
# the stepwise regression model of predictand on chronologies. These settings
# are used by function LeaveOut to compute m=1+4*max(nNeg,nPos), the nunber
# of observations to be omitted in each iteration of leave-m-out cross-
# validation
# i2 is order that columns of matrix of potential predictors entered model
# in calling script or function, which is assumed to have applied stepwise
# regression; that order is repeated here in the stepwise cross-validation
#
# OUT
# CV [named list] statistics for maximim-RE model from forward stepwise
# Names self-describing. Includes step of maximum RE; columns of the original
# matrix of potential predictors in the maximum-RE model; reduction of error
# statistic, validation root-mean-square errot, cross-validation predictions,
# cross-validation residuals of the final model; and number of observations
# left out in leave-m-out cross-validation. Also in CV is the numerical
# vector REcvAll of RE at each step of the modeling, and corresponding vector
# RMSEVall or RMSE at each step
#
# NOTES
#
# Absolutely important that input column pointer i1 indicates columns of X
# in order (left to right) as they entered stepwise in the regression assumed
# to have been done before calling this function.
#
# revised 2024-03-08: cosmetic; expansion and clarification of comments

library("pracma") # needed for emulation of Matlab "backslash" operator through
# QR decomposition

source(paste(code_dir,"LeaveOut.R",sep="")) # form pointer matrix for leave-m-out cross-validation


#--- Build pointer matrix for predictor sets

X<-as.matrix(X)
mX <-dim(X)[1]
y <- as.matrix(y)
H<-LeaveOut(nNeg,nPos,mX)
Lin <- H$Lin
mOut <- H$NumberLeftOut


#### Cross-validation modeling

#... Storage for models for all steps
E1 <- matrix(NA,mX,length(i2)) # to store cross-valid residuals for all models
P1 <- E1 # to hold cross-validation predictions ...
RMSEvAll <- rep(NA,length(i2)) # to store RMSEv for all models
REall <- RMSEvAll # ... RE ...

for (k in 1:length(i2)){

#--- Storage for various 1-col matrices specific to k-step model
w1 <- matrix(NA,mX,1) # cv predictions
w2 <- w1 # null predictions (equal to calibration means)
ithis <- i2[1:k]

#--- long-term predictor matrix
Xthis <- as.matrix(X[,ithis])
a1this <- matrix(1,mX,1)
Xthis <- cbind(a1this,Xthis)


for (n in 1:mX){

#--- Build predictor matrix
Lthis <- Lin[,n]
Lthis <- as.logical(as.matrix(Lthis))
nthis <- sum(Lthis)
u <- as.matrix(X[Lthis,ithis])
a1 <- matrix(1,nthis,1)
U <- cbind(a1,u) # predictor matrix

# Build predictand as 1-col matrix
v <- as.matrix(y[Lthis])

#--- Matrix left division to estimate regression parameters
b <- mldivide(U,v) # [matrix, 1 col, with coefficients, constant term first]

#--- Estimated predictand for central "left-out" observation
vhat1 <- Xthis[n,] %*% b
w1[n] <- vhat1;
w2[n] <- mean(v)

}

#--- Residuals time series
e1<-y-w1 # cross-validation
e2 <- y-w2 # null-model (using calib means as predictions)

#--- Validaton statistics
SSE1 <- sum(e1*e1) # sum of squares of cross-validation errors
MSE1 <- SSE1/mX # mean square error of cross-validation
RMSEv <- sqrt(MSE1) # root-mean-square error of cross-validation
SSE2 <- sum(e2*e2) # sum of square of null-model residuals
RE <- 1 - SSE1/SSE2 # reduction of error statistic

#--- Store statistics for model step
E1[,k] <- e1
P1[,k] <- w1
RMSEvAll[k]<- RMSEv
REall[k] <-RE
}
#--- Find maximum-RE model and its statistics
kmax <- which.max(REall) # at this step
i2cv <- i2[1:kmax]
REwinner <- REall[kmax]
Pcv <- as.matrix(P1[,kmax]) # cv predictions
Ecv <- as.matrix(E1[,kmax]) # cv errors
RMSEcv <- RMSEvAll[kmax]


CV <- list("REmaxStep"=kmax,"ColumnsIn"=i2cv,"REcvAll"=REall,"REcv"=REwinner,
"CVpredictions"=Pcv,"CVresiduals"=Ecv,"RMSEvall"=RMSEvAll,"RMSEcv"=RMSEcv,
"LeftOut"=mOut)
return(CV)
}
85 changes: 85 additions & 0 deletions CrossValid2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
CrossValid2<- function(X, y, nNeg,nPos) {
# Leave-m-out cross-validation of a regression model.
# D Meko
# last revised 20220104
#
# IN
# X is matrix of predictors
# y is vector of predictand
# nNeg and nPos are the maximum negative and positive lags that were considered when
# the model was fit.
#
# OUT
# Output [named list] cross-validation statistics:
# REcv (1x1)r cross-validation reduction of error
# CVpredictions [m 1 col]r cross-validation predictions
# CVresidual [m, 1 col]r cross-validation residuals
# RMSEcv (1x1)r root-mean-square error of cross-validation
# LeftOut (1x1)i how many obs left out in each cross-validation model
#
# This is simplified from CrossValid1(), which handles various steps of a model
# previously fit by forward stepwise regression.

library("pracma") # needed for emulation of Matlab "backslash" operator through
# QR decomposition

source(paste(code_dir,"LeaveOut.R",sep="")) # form pointer matrix for leave-m-out cross-validation

#--- Build pointer matrix for cross-validation predictor sets

X<-as.matrix(X)
mX <-dim(X)[1]
y <- as.matrix(y)
H<-LeaveOut(nNeg,nPos,mX)
Lin <- H$Lin # logical pointer matrix; each col marks obs to use as 1
mOut <- H$NumberLeftOut


#### Cross-validation modeling

#--- Storage
w1 <- matrix(NA,mX,1) # to hold cv predictions
w2 <- w1 # to hold null predictions (equal to calibration means)

#--- long-term predictor matrix, with 1's in first col
a1this <- matrix(1,mX,1)
Xthis <- cbind(a1this,X)


for (n in 1:mX){ # Loop over observations

#--- Build predictor matrix
Lthis <- Lin[,n]
Lthis <- as.logical(as.matrix(Lthis))
nthis <- sum(Lthis)
u <- as.matrix(X[Lthis,])
a1 <- matrix(1,nthis,1)
U <- cbind(a1,u) # predictor matrix, this cv model

# Build predictand as 1-col matrix
v <- as.matrix(y[Lthis])

#--- Matrix left division to estimate regression parameters
b <- mldivide(U,v) # [matrix, 1 col, with coefficients, constant term first]

#--- Estimated predictand for central "left-out" observation
vhat1 <- Xthis[n,] %*% b
w1[n] <- vhat1;
w2[n] <- mean(v)
}

#--- Time series of residuals
e1<-y-w1 # cross-validation
e2 <- y-w2 # null-model (using calib means as predictions)

#--- Validaton statistics
SSE1 <- sum(e1*e1) # sum of squares of cross-validation errors
MSE1 <- SSE1/mX # mean square error of cross-validation
RMSEcv <- sqrt(MSE1) # root-mean-square error of cross-validation
SSE2 <- sum(e2*e2) # sum of square of null-model residuals
REcv <- 1 - SSE1/SSE2 # reduction of error statistic

Output <- list("REcv"=REcv,"CVpredictions"=w1,"CVresiduals"=e1,"RMSEcv"=RMSEcv,
"LeftOut"=mOut)
return(Output)
}
113 changes: 113 additions & 0 deletions EffectSS.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
EffectSS <- function(x,y) {
# D. Meko
# Last revised 2022-09-09
# Effective sample size -- effective number of "independent" observations.
#
# Effective sample size, Nprime, is computed from the lag-1 autocorrelation of a time series
# or pair of series. In univariate mode, Nprime can be applied for adjustment of significance
# of univariate statistics (e.g., uncertainty of the sample mean or variance). In bivariate
# mode, the effecitive sample size can be applied to adjust signficance of the correlation
# coefficient for the two series.
#
#---IN
#
# x: time series matrix or vector without any NA; number of observations mx, number of series nx
# y: ditto; but if passing vector and matrix, make sure y is the vector and x the matrix
#
#---OUT
#
# Output: list with fields
# Nprime: scalar or vector of effective sample size
# Lflag: flag (logical, length 2)
# (1) x or y have at least one NA
# (2) y is not vector and not same col-size as x
# ErrorMessage [vector]c : error message associated with Lflag
#
#---NOTES
#
# If input argument y is NA, Nprime is computed in univariate mode:
# If x is vector, Nprime is the scalar effective sample size
# If x is a matrix, Nprime is the vector of the effective sample sizes of the individual series
# If input argument y is not NA, Nprime is computed in bivariate mode:
# If y is a vector, Nprime is the effective sample size for correlation of y with all
# series in x (Nprime can be scalar or vector, depending on whether x is scalar or vector)
# If y is a matrix with ny>1 columns, ny must equal nx, and Nprime is the effective sample size
# for correlation of each column of x with same column of y (Nprime is a vector)
# Method. In univariate mode, if original sample size is N, effective sample size is
# Nprime = N(1-r1)/(1+r1), where r1 is the lag-1 autocorrelation
# Method. In bivariate mode, for pair of series, x and y, effective sample size is
# Nprime = N(1-r1r2)/(1+r1r2), where r1 is lag-1 autocorrelation of x and r2 is lag-1
# autocorrelation of y

source(paste(code_dir,"LagkAcc.R",sep="")) # optional transformation of flows
Lflag <-c(FALSE,FALSE) # initialize as no error flags
ErrorMessage <- "No problems"
Nprime <- NA

klag <-1 # will only need lag-1 autocorrelation

if (!all(complete.cases(x)) | (!all(is.na(y)) & !all(complete.cases(y)))){
# ERROR MESSAGE
Lflag[1]<-TRUE
ErrorMessage <- 'x or y contain a NA'
Output <- list(Lflag=Lflag,ErrorMessage=ErrorMessage,Nprime=Nprime)
return(Output)
}
if (is.vector(x)){
N <- length(x)
nx <-1
} else {
N <- dim(x)[1]
nx <- dim(x)[2]
}
if (all(is.na(y))){
# Univariate mode for effective sample size
ResTemp <- LagkAcc(x,klag)
a <- 1-ResTemp$rk
b <- 1+ResTemp$rk
f <- a/b
Nprime <- floor(f*N)
L <- ResTemp$rk <= 0
Nprime[L] <- N # if lag-1 r of either series lag-1 autocorr non-positive, Nprime equals N
} else {
# bivariate mode (e.g., for significane adjustment for correlation)
if (is.vector(y)){
if (nx ==1){
# both x and y are vectors
ResTemp<- rkGet(x,y,klag)
r1x <- ResTemp$r1x; r1y <- ResTemp$r1y
} else {
# y vector, x matrix
y = matrix(replicate(nx,y),nrow=N) # replicate y to same col-size as x
ResTemp<- rkGet(x,y,klag)
r1x <- ResTemp$r1x; r1y <- ResTemp$r1y
}
} else {
# y and x both matrix
ny <- dim(y)[2]
if (ny != nx){
# ERROR MESSAGE
Lflag[2]<-TRUE
ErrorMessage <- 'y is matrix and not same col-size as x'
Output <- list(Lflag=Lflag,ErrorMessage=ErrorMessage,Nprime=Nprime)
return(Output)
}
ResTemp<- rkGet(x,y,klag)
r1x <- ResTemp$r1x; r1y <- ResTemp$r1y
}
rr <- r1x * r1y # products of lag-1 autocorrelation
f <- (1-rr)/(1+rr)
Nprime <- floor(f*N)
L <- r1x <=0 | r1y <=0
Nprime[L] <- N
}

Output <- list(Nprime=Nprime,Lflag=Lflag,ErrorMessage=ErrorMessage)
}
rkGet <- function(x,y,klag){
ResTempx <- LagkAcc(x,klag)
ResTempy <- LagkAcc(y,klag)
r1x <- ResTempx$rk
r1y <- ResTempy$rk
Out1 <- list(r1x=r1x,r1y=r1y)
}
Loading

0 comments on commit dfec680

Please sign in to comment.