-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Dave Meko
committed
Oct 27, 2024
1 parent
b5359a0
commit dfec680
Showing
68 changed files
with
9,995 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
CrossValid1<- function(X, y, nNeg, nPos, i2) { | ||
# Leave-m-out cross-validation of a previously estimated stepwise regression model. | ||
# D Meko | ||
# last revised 2024-03-08 | ||
# | ||
# IN | ||
# X is matrix of potential predictors | ||
# y is vector of predictand | ||
# nNeg and nPos are the maximum negative and positive lags that were allowed in | ||
# the stepwise regression model of predictand on chronologies. These settings | ||
# are used by function LeaveOut to compute m=1+4*max(nNeg,nPos), the nunber | ||
# of observations to be omitted in each iteration of leave-m-out cross- | ||
# validation | ||
# i2 is order that columns of matrix of potential predictors entered model | ||
# in calling script or function, which is assumed to have applied stepwise | ||
# regression; that order is repeated here in the stepwise cross-validation | ||
# | ||
# OUT | ||
# CV [named list] statistics for maximim-RE model from forward stepwise | ||
# Names self-describing. Includes step of maximum RE; columns of the original | ||
# matrix of potential predictors in the maximum-RE model; reduction of error | ||
# statistic, validation root-mean-square errot, cross-validation predictions, | ||
# cross-validation residuals of the final model; and number of observations | ||
# left out in leave-m-out cross-validation. Also in CV is the numerical | ||
# vector REcvAll of RE at each step of the modeling, and corresponding vector | ||
# RMSEVall or RMSE at each step | ||
# | ||
# NOTES | ||
# | ||
# Absolutely important that input column pointer i1 indicates columns of X | ||
# in order (left to right) as they entered stepwise in the regression assumed | ||
# to have been done before calling this function. | ||
# | ||
# revised 2024-03-08: cosmetic; expansion and clarification of comments | ||
|
||
library("pracma") # needed for emulation of Matlab "backslash" operator through | ||
# QR decomposition | ||
|
||
source(paste(code_dir,"LeaveOut.R",sep="")) # form pointer matrix for leave-m-out cross-validation | ||
|
||
|
||
#--- Build pointer matrix for predictor sets | ||
|
||
X<-as.matrix(X) | ||
mX <-dim(X)[1] | ||
y <- as.matrix(y) | ||
H<-LeaveOut(nNeg,nPos,mX) | ||
Lin <- H$Lin | ||
mOut <- H$NumberLeftOut | ||
|
||
|
||
#### Cross-validation modeling | ||
|
||
#... Storage for models for all steps | ||
E1 <- matrix(NA,mX,length(i2)) # to store cross-valid residuals for all models | ||
P1 <- E1 # to hold cross-validation predictions ... | ||
RMSEvAll <- rep(NA,length(i2)) # to store RMSEv for all models | ||
REall <- RMSEvAll # ... RE ... | ||
|
||
for (k in 1:length(i2)){ | ||
|
||
#--- Storage for various 1-col matrices specific to k-step model | ||
w1 <- matrix(NA,mX,1) # cv predictions | ||
w2 <- w1 # null predictions (equal to calibration means) | ||
ithis <- i2[1:k] | ||
|
||
#--- long-term predictor matrix | ||
Xthis <- as.matrix(X[,ithis]) | ||
a1this <- matrix(1,mX,1) | ||
Xthis <- cbind(a1this,Xthis) | ||
|
||
|
||
for (n in 1:mX){ | ||
|
||
#--- Build predictor matrix | ||
Lthis <- Lin[,n] | ||
Lthis <- as.logical(as.matrix(Lthis)) | ||
nthis <- sum(Lthis) | ||
u <- as.matrix(X[Lthis,ithis]) | ||
a1 <- matrix(1,nthis,1) | ||
U <- cbind(a1,u) # predictor matrix | ||
|
||
# Build predictand as 1-col matrix | ||
v <- as.matrix(y[Lthis]) | ||
|
||
#--- Matrix left division to estimate regression parameters | ||
b <- mldivide(U,v) # [matrix, 1 col, with coefficients, constant term first] | ||
|
||
#--- Estimated predictand for central "left-out" observation | ||
vhat1 <- Xthis[n,] %*% b | ||
w1[n] <- vhat1; | ||
w2[n] <- mean(v) | ||
|
||
} | ||
|
||
#--- Residuals time series | ||
e1<-y-w1 # cross-validation | ||
e2 <- y-w2 # null-model (using calib means as predictions) | ||
|
||
#--- Validaton statistics | ||
SSE1 <- sum(e1*e1) # sum of squares of cross-validation errors | ||
MSE1 <- SSE1/mX # mean square error of cross-validation | ||
RMSEv <- sqrt(MSE1) # root-mean-square error of cross-validation | ||
SSE2 <- sum(e2*e2) # sum of square of null-model residuals | ||
RE <- 1 - SSE1/SSE2 # reduction of error statistic | ||
|
||
#--- Store statistics for model step | ||
E1[,k] <- e1 | ||
P1[,k] <- w1 | ||
RMSEvAll[k]<- RMSEv | ||
REall[k] <-RE | ||
} | ||
#--- Find maximum-RE model and its statistics | ||
kmax <- which.max(REall) # at this step | ||
i2cv <- i2[1:kmax] | ||
REwinner <- REall[kmax] | ||
Pcv <- as.matrix(P1[,kmax]) # cv predictions | ||
Ecv <- as.matrix(E1[,kmax]) # cv errors | ||
RMSEcv <- RMSEvAll[kmax] | ||
|
||
|
||
CV <- list("REmaxStep"=kmax,"ColumnsIn"=i2cv,"REcvAll"=REall,"REcv"=REwinner, | ||
"CVpredictions"=Pcv,"CVresiduals"=Ecv,"RMSEvall"=RMSEvAll,"RMSEcv"=RMSEcv, | ||
"LeftOut"=mOut) | ||
return(CV) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
CrossValid2<- function(X, y, nNeg,nPos) { | ||
# Leave-m-out cross-validation of a regression model. | ||
# D Meko | ||
# last revised 20220104 | ||
# | ||
# IN | ||
# X is matrix of predictors | ||
# y is vector of predictand | ||
# nNeg and nPos are the maximum negative and positive lags that were considered when | ||
# the model was fit. | ||
# | ||
# OUT | ||
# Output [named list] cross-validation statistics: | ||
# REcv (1x1)r cross-validation reduction of error | ||
# CVpredictions [m 1 col]r cross-validation predictions | ||
# CVresidual [m, 1 col]r cross-validation residuals | ||
# RMSEcv (1x1)r root-mean-square error of cross-validation | ||
# LeftOut (1x1)i how many obs left out in each cross-validation model | ||
# | ||
# This is simplified from CrossValid1(), which handles various steps of a model | ||
# previously fit by forward stepwise regression. | ||
|
||
library("pracma") # needed for emulation of Matlab "backslash" operator through | ||
# QR decomposition | ||
|
||
source(paste(code_dir,"LeaveOut.R",sep="")) # form pointer matrix for leave-m-out cross-validation | ||
|
||
#--- Build pointer matrix for cross-validation predictor sets | ||
|
||
X<-as.matrix(X) | ||
mX <-dim(X)[1] | ||
y <- as.matrix(y) | ||
H<-LeaveOut(nNeg,nPos,mX) | ||
Lin <- H$Lin # logical pointer matrix; each col marks obs to use as 1 | ||
mOut <- H$NumberLeftOut | ||
|
||
|
||
#### Cross-validation modeling | ||
|
||
#--- Storage | ||
w1 <- matrix(NA,mX,1) # to hold cv predictions | ||
w2 <- w1 # to hold null predictions (equal to calibration means) | ||
|
||
#--- long-term predictor matrix, with 1's in first col | ||
a1this <- matrix(1,mX,1) | ||
Xthis <- cbind(a1this,X) | ||
|
||
|
||
for (n in 1:mX){ # Loop over observations | ||
|
||
#--- Build predictor matrix | ||
Lthis <- Lin[,n] | ||
Lthis <- as.logical(as.matrix(Lthis)) | ||
nthis <- sum(Lthis) | ||
u <- as.matrix(X[Lthis,]) | ||
a1 <- matrix(1,nthis,1) | ||
U <- cbind(a1,u) # predictor matrix, this cv model | ||
|
||
# Build predictand as 1-col matrix | ||
v <- as.matrix(y[Lthis]) | ||
|
||
#--- Matrix left division to estimate regression parameters | ||
b <- mldivide(U,v) # [matrix, 1 col, with coefficients, constant term first] | ||
|
||
#--- Estimated predictand for central "left-out" observation | ||
vhat1 <- Xthis[n,] %*% b | ||
w1[n] <- vhat1; | ||
w2[n] <- mean(v) | ||
} | ||
|
||
#--- Time series of residuals | ||
e1<-y-w1 # cross-validation | ||
e2 <- y-w2 # null-model (using calib means as predictions) | ||
|
||
#--- Validaton statistics | ||
SSE1 <- sum(e1*e1) # sum of squares of cross-validation errors | ||
MSE1 <- SSE1/mX # mean square error of cross-validation | ||
RMSEcv <- sqrt(MSE1) # root-mean-square error of cross-validation | ||
SSE2 <- sum(e2*e2) # sum of square of null-model residuals | ||
REcv <- 1 - SSE1/SSE2 # reduction of error statistic | ||
|
||
Output <- list("REcv"=REcv,"CVpredictions"=w1,"CVresiduals"=e1,"RMSEcv"=RMSEcv, | ||
"LeftOut"=mOut) | ||
return(Output) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
EffectSS <- function(x,y) { | ||
# D. Meko | ||
# Last revised 2022-09-09 | ||
# Effective sample size -- effective number of "independent" observations. | ||
# | ||
# Effective sample size, Nprime, is computed from the lag-1 autocorrelation of a time series | ||
# or pair of series. In univariate mode, Nprime can be applied for adjustment of significance | ||
# of univariate statistics (e.g., uncertainty of the sample mean or variance). In bivariate | ||
# mode, the effecitive sample size can be applied to adjust signficance of the correlation | ||
# coefficient for the two series. | ||
# | ||
#---IN | ||
# | ||
# x: time series matrix or vector without any NA; number of observations mx, number of series nx | ||
# y: ditto; but if passing vector and matrix, make sure y is the vector and x the matrix | ||
# | ||
#---OUT | ||
# | ||
# Output: list with fields | ||
# Nprime: scalar or vector of effective sample size | ||
# Lflag: flag (logical, length 2) | ||
# (1) x or y have at least one NA | ||
# (2) y is not vector and not same col-size as x | ||
# ErrorMessage [vector]c : error message associated with Lflag | ||
# | ||
#---NOTES | ||
# | ||
# If input argument y is NA, Nprime is computed in univariate mode: | ||
# If x is vector, Nprime is the scalar effective sample size | ||
# If x is a matrix, Nprime is the vector of the effective sample sizes of the individual series | ||
# If input argument y is not NA, Nprime is computed in bivariate mode: | ||
# If y is a vector, Nprime is the effective sample size for correlation of y with all | ||
# series in x (Nprime can be scalar or vector, depending on whether x is scalar or vector) | ||
# If y is a matrix with ny>1 columns, ny must equal nx, and Nprime is the effective sample size | ||
# for correlation of each column of x with same column of y (Nprime is a vector) | ||
# Method. In univariate mode, if original sample size is N, effective sample size is | ||
# Nprime = N(1-r1)/(1+r1), where r1 is the lag-1 autocorrelation | ||
# Method. In bivariate mode, for pair of series, x and y, effective sample size is | ||
# Nprime = N(1-r1r2)/(1+r1r2), where r1 is lag-1 autocorrelation of x and r2 is lag-1 | ||
# autocorrelation of y | ||
|
||
source(paste(code_dir,"LagkAcc.R",sep="")) # optional transformation of flows | ||
Lflag <-c(FALSE,FALSE) # initialize as no error flags | ||
ErrorMessage <- "No problems" | ||
Nprime <- NA | ||
|
||
klag <-1 # will only need lag-1 autocorrelation | ||
|
||
if (!all(complete.cases(x)) | (!all(is.na(y)) & !all(complete.cases(y)))){ | ||
# ERROR MESSAGE | ||
Lflag[1]<-TRUE | ||
ErrorMessage <- 'x or y contain a NA' | ||
Output <- list(Lflag=Lflag,ErrorMessage=ErrorMessage,Nprime=Nprime) | ||
return(Output) | ||
} | ||
if (is.vector(x)){ | ||
N <- length(x) | ||
nx <-1 | ||
} else { | ||
N <- dim(x)[1] | ||
nx <- dim(x)[2] | ||
} | ||
if (all(is.na(y))){ | ||
# Univariate mode for effective sample size | ||
ResTemp <- LagkAcc(x,klag) | ||
a <- 1-ResTemp$rk | ||
b <- 1+ResTemp$rk | ||
f <- a/b | ||
Nprime <- floor(f*N) | ||
L <- ResTemp$rk <= 0 | ||
Nprime[L] <- N # if lag-1 r of either series lag-1 autocorr non-positive, Nprime equals N | ||
} else { | ||
# bivariate mode (e.g., for significane adjustment for correlation) | ||
if (is.vector(y)){ | ||
if (nx ==1){ | ||
# both x and y are vectors | ||
ResTemp<- rkGet(x,y,klag) | ||
r1x <- ResTemp$r1x; r1y <- ResTemp$r1y | ||
} else { | ||
# y vector, x matrix | ||
y = matrix(replicate(nx,y),nrow=N) # replicate y to same col-size as x | ||
ResTemp<- rkGet(x,y,klag) | ||
r1x <- ResTemp$r1x; r1y <- ResTemp$r1y | ||
} | ||
} else { | ||
# y and x both matrix | ||
ny <- dim(y)[2] | ||
if (ny != nx){ | ||
# ERROR MESSAGE | ||
Lflag[2]<-TRUE | ||
ErrorMessage <- 'y is matrix and not same col-size as x' | ||
Output <- list(Lflag=Lflag,ErrorMessage=ErrorMessage,Nprime=Nprime) | ||
return(Output) | ||
} | ||
ResTemp<- rkGet(x,y,klag) | ||
r1x <- ResTemp$r1x; r1y <- ResTemp$r1y | ||
} | ||
rr <- r1x * r1y # products of lag-1 autocorrelation | ||
f <- (1-rr)/(1+rr) | ||
Nprime <- floor(f*N) | ||
L <- r1x <=0 | r1y <=0 | ||
Nprime[L] <- N | ||
} | ||
|
||
Output <- list(Nprime=Nprime,Lflag=Lflag,ErrorMessage=ErrorMessage) | ||
} | ||
rkGet <- function(x,y,klag){ | ||
ResTempx <- LagkAcc(x,klag) | ||
ResTempy <- LagkAcc(y,klag) | ||
r1x <- ResTempx$rk | ||
r1y <- ResTempy$rk | ||
Out1 <- list(r1x=r1x,r1y=r1y) | ||
} |
Oops, something went wrong.