-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathDataSummarizationInR.R
107 lines (84 loc) · 2.92 KB
/
DataSummarizationInR.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#Few functions for Data Summarization on Data Frames:
# table()-tells the frequency of occurence of each variable category in the dataset.
# xtabs()- tells the relationship between two categorical variables
# aggregate()- Splits the data into subsets, computes summary statistics for each.
#
mtcars
View(mtcars)
#table-------
table(mtcars$cyl)
table(mtcars$gear)
table(mtcars$cyl,mtcars$gear) #count Gear vs Cyl
#1 cars with gear4 and cyl3
table(mtcars$cyl,mtcars$gear, dnn=c('Cyl','Gear')) #count Gear vs Cyl ; dnn - dimension names
table(mtcars$cyl,mtcars$gear, mtcars$carb) #3 dim
#Matrix-Carb8 - rows-gear, col-Cyl
# Cross tabulation
#xtabs------
?xtabs
xtabs(formula = ~ cyl + gear, data=mtcars)
xtabs(formula = ~ cyl + gear + carb, data=mtcars)
#aggregate-------
?aggregate
#formula method
aggregate(formula = mpg ~ gear, data=mtcars, FUN=mean)
#mean of mpg & hp wrt gear types
aggregate(formula = cbind(mpg, hp) ~ gear, data=mtcars, FUN=mean)
#mean of mpg wrt gear & cyl types
aggregate(formula = mpg ~ gear + cyl, data=mtcars, FUN=mean)
#mean of mpg, hp & wt wrt gear & cyl types
aggregate(formula = cbind(mpg,hp,wt) ~ gear + cyl, data=mtcars, FUN=mean)
#Various types of "table" commands
#table, addmargins, margin.table, prop.table
mtcars
str(mtcars)
#table-------
table(mtcars$cyl)
table(mtcars$cyl, mtcars$gear)
t1 = table(mtcars$cyl, mtcars$gear, dnn=c('cyl', 'gear'))
t1
#margin table------------
#sums values over margin
?margin.table
margin.table(t1)
margin.table(x=t1, margin=1) #cylwise
#margin-1 : rows
margin.table(x=t1, margin=2) #gearwise
#addmargins---------
#add different functions over the margins
?addmargins
addmargins(A=t1, margin=1) #default is sum; sum of each column, shown as row
addmargins(A=t1, margin=2, FUN=mean) #mean as column of each row
addmargins(A=t1, margin=c(1,2), FUN=sum) #sum at both row and column
addmargins(A=t1, margin=c(1,2), FUN=c(sum,mean)) #two function on rows/ col margins
#different functions in rows and columns
addmargins(A=t1, margin=c(1,2), FUN=list(list(sum,mean, length, mean), list(sd, sum)))
#prop.table-------
?prop.table
#proportion of values
t1
prop.table(t1)
#Cyl4, Gear3 : 1 out of 32 = 1/32
prop.table(t1, margin=1) #sum of each row=1
t1
#Cyl4, Gear 3 : 1 out of 11 Cyl4 cars : 1/11
prop.table(t1, margin=2) #sum of each col=1
#Cyl4, Gear 3 : 1 out of 15 Gear3 cars : 1/15
matrix(paste(prop.table(x=t1)*100 , '%', sep=''), ncol=3)
#quick check
rowSums(prop.table(t1, margin=1)) #correct
#rowSums(prop.table(t1, margin=2))
colSums(prop.table(t1, margin=2))
#transpose----
t(m1)
m1
#sweep----
sweep(m1, MARGIN = 1, STATS = c(2,3,4,5,6), FUN="+" ) #rowise
sweep(m1, MARGIN = 2, STATS = c(2,3,4,5,6,7), FUN="*" ) #colwise
#addmargins----
m1 <- matrix(1:10, nrow=3)
m1
addmargins(m1,1,sum) #colwise function
addmargins(m1,2,mean) #rowwise function
addmargins(m1,c(1,2),mean) #row & col wise function
addmargins(m1,c(1,2),list(list(mean,sum), list(var,sd))) #row & col wise function