-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSleepDataGLM.R
57 lines (42 loc) · 1.89 KB
/
SleepDataGLM.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
file_path <- "C:/Users/Liam/Desktop/Files/Code/GitHubRepos/SleepData/sleepdataGLM.csv"
data <- read.csv(file_path)
sleep_data <- as.data.frame(data)
attach(sleep_data)
summary(sleep_data)
head(sleep_data)
nrow(sleep_data)
# ---------------------------------------------------------------------------
# Create train data to find significant predictors
# ---------------------------------------------------------------------------
set.seed(123)
train_index <- sample(row.names(sleep_data), .65 * nrow(sleep_data))
train_data <- sleep_data[train_index,]
test_index <- setdiff(row.names(sleep_data), train_index)
test_data <- sleep_data[test_index,]
model <- glm(SleepQuality ~., family=binomial(), data=train_data)
phat <- predict(model, test_data, type = 'response')
alpha <- 0.8
### Creation of ROC (Receiver Operating Characteristic) Curve
num.intervals = 1000
delta <- 1/num.intervals
alpha.range <- seq(delta, num.intervals * delta, delta)
TP <- TN <- FN <- FP <- rep(0, length(alpha.range))
for (alpha in alpha.range) {
TP[alpha * num.intervals] <- sum(test_data$SleepQuality == 1 & phat >= alpha)
TN[alpha * num.intervals] <- sum(test_data$SleepQuality == 1 & phat < alpha)
FN[alpha * num.intervals] <- sum(test_data$SleepQuality == 0 & phat <= alpha)
FP[alpha * num.intervals] <- sum(test_data$SleepQuality == 0 & phat > alpha)
}
normalized_FP <- FP / max(FP)
normalized_TP <- TP / max(TP)
plot(x = normalized_FP, y = normalized_TP, type = 'l', xlim = c(0, 1),
ylim = c(0, 1), xlab = "False Positive Rate (FPR)",
ylab = "True Positive Rate (TPR)")
TP_rate <- TP / (TP + FP)
AUC <- mean(TP_rate[-1000])
cat("AUC: ", AUC, "\n")
#-------------------------
rm(data, model, sleep_data, test_data, train_data, alpha, alpha.range, AUC,
delta, file_path, FN, FP, normalized_FP, normalized_TP, num.intervals, phat,
test_index, TN, TP, TP_rate, train_index)
#-------------------------