-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathLinear&MultipleRegression.R
107 lines (88 loc) · 2.67 KB
/
Linear&MultipleRegression.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#Regression
df <- read.csv("F:/R Tool Directory/Choudhary R/stud_reg.csv", header = T)
View(df)
df
#Single Linear Regression----
result<-lm(APPLICANTS~PLACE_RATE, data=df)
summary(result)
data_fitted<-data.frame(df,fitted.values=fitted(result), residual=resid(result))
data_fitted
#Multiple Regression----
result<-lm(APPLICANTS~PLACE_RATE+NO_GRAD_STUD, data=df)
summary(result)
data_fitted<-data.frame(df,fitted.values=fitted(result), residual=resid(result))
data_fitted
#Exercise
age<-c(18:29)
age
hieght<-c(76.1,77,78.1,78.2,78.8,79.7,79.9,81.1,81.2,81.8,82.8,83.5)
hieght
df<-data.frame(age,hieght)
df
plot(age~hieght)
result<-lm(age~hieght)
summary(result)
#Eq: Age= -100.84 + 1.55*Hieght
#Accuracy:
#Since R2 is close to 1, hence model is highly significant.
data_fitted<-data.frame(df , fitted.value=fitted(result),residual=resid(result))
data_fitted
library("ggplot2")
g <- ggplot(df, aes(x=hieght, y=age)) + geom_point() + geom_smooth(method="lm")
plot(g)
#Exercise2:
names(mtcars)
View(mtcars)
plot(mpg~hp, data=mtcars)
plot(mpg~wt, data=mtcars)
result<-lm(mpg~hp+wt, data=mtcars)
summary(result)
#Value of Adjusted R2 = 0.82,
#means that "82% of the variance in the measure of mpg can be predicted by hp and wt."
#Checking Multicollinearity:
result<-lm(mpg~hp+wt+disp+cyl+gear, data=mtcars)
summary(result)
#install.packages("usdm")
library(usdm)
vif(mtcars)
vif(mtcars[,c(-3)])
vif(mtcars[,c(-3,-2)])
result<-lm(mpg~hp+wt, data=mtcars)
summary(result)
#----Example-3
#Create Training and Test data ----
trainingRowIndex <- sample(1:nrow(mtcars), 0.8*nrow(mtcars)) # row indices for training data
trainingData <- mtcars[trainingRowIndex, ] # model training data
testData <- mtcars[-trainingRowIndex, ] # test data
#Bi-variate Analysis:----
#Checking relationships between different variables
pairs(mtcars)
#Correlation
cr = cor(mtcars)
cr
library(corrplot)
corrplot(cr,type="lower",method="circle")
corrplot(cr,type="lower",method="number")
#Build the model on training data----
lmMod <- lm(mpg ~ cyl+ #disp
#+hp
+wt, data=trainingData) # build the model
# Review diagnostic measures
summary (lmMod) # model summary
#Accuracy:
#Since R2 is close to 1, i.e., 0.84, hence model is significant.
#Prediction----
#Predicitng values for test dataset
testData$mpgPred <- predict(lmMod, testData)
View(testData)
#Accuracy:----
#Determining Prediction accuracy on test dataset using MAPE
#MAPE(MeanAbsolutePercentageError):
#Lower its value better is the accuracy of the model.
#MAPE Calculation:
mape <- mean(abs((testData$mpgPred - testData$mpg))/testData$mpg)
mape
# Mape using mape function
#install.packages("Metrics")
library(Metrics)
mape(testData$mpgPred,testData$mpg)