-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathUS_Education_Efficiency.Rmd
152 lines (132 loc) · 5.26 KB
/
US_Education_Efficiency.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
---
title: "US EDUCATION EFFICIENCY ANALYSIS"
output: html_notebook
---
# Install Packages
```{r}
#install.packages("tidyverse")
#install.packages("psych")
```
# Library Packages
```{r}
#library(tidyverse)
#library(readr)
#library(dplyr)
#library(scales)
#library(ggplot2)
#library(psych)
```
# Read dataset
```{r}
#Choose the CSV file interactively
#file_path <- file.choose()
#Read the CSV file into R
US_Scores <- read.csv(file_path)
head(US_Scores)
```
# Create a summary of the dataset to view all columns and components
```{r}
summary(US_Scores)
```
# Create a subset of various states for analysis
```{r}
# The states chosen for this subset are from various regions of the country
Five_State_Scores <- subset(US_Scores,State.Name == "North Dakota" |
State.Name == "California" |
State.Name == "Florida" |
State.Name == "Texas" |
State.Name == "New York")
Five_State_Scores
```
# Data Visualization of math SAT test scores
```{r}
# Install and load the 'scales' package if not already installed
if (!requireNamespace("scales", quietly = TRUE)) {
install.packages("scales")
}
# Load the 'scales' package
library(scales)
# Your ggplot code with the scale_x_continuous modification
ggplot(data = Five_State_Scores, mapping = aes(x = Year, y = Total.Math, linetype = State.Name)) +
geom_point() +
geom_smooth(mapping = aes(color = State.Name)) +
scale_x_continuous(breaks = pretty_breaks()) +
labs(title = "ND, FL, CA, TX, NY Math Scores",
y = "Math Test Scores")
```
# Data Visualization of verbal SAT test scores
```{r}
ggplot( data = Five_State_Scores, mapping = aes(x = Year, y = Total.Verbal, linetype = State.Name)) +
geom_smooth(mapping = aes(color = State.Name)) +
geom_point() +
scale_x_continuous(breaks = pretty_breaks()) +
labs(title = "ND, FL, CA, TX, NY Verbal Scores",
y = "Verbal Test Scores")
```
Examining the top 5 states
```{r}
# Install and load the required packages if not already installed
if (!requireNamespace("tidyverse", quietly = TRUE)) {
install.packages("tidyverse")
}
# Load the 'tidyverse' package
library(tidyverse)
# Create a bar chart for average math and verbal scores by state
Five_State_Scores %>%
group_by(State.Name, Year) %>%
summarise(Avg_Math = mean(Total.Math, na.rm = TRUE),
Avg_Verbal = mean(Total.Verbal, na.rm = TRUE)) %>%
ggplot(aes(x = Year, y = Avg_Math + Avg_Verbal, fill = State.Name)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Average Math and Verbal Scores by State and Year",
y = "Average Total Scores") +
theme_minimal()
# Create a line graph for average math and verbal scores by state
Five_State_Scores %>%
group_by(State.Name, Year) %>%
summarise(Avg_Math = mean(Total.Math, na.rm = TRUE),
Avg_Verbal = mean(Total.Verbal, na.rm = TRUE)) %>%
ggplot(aes(x = Year, y = Avg_Math + Avg_Verbal, color = State.Name)) +
geom_line(size = 1.5) + # Adjust line thickness
labs(title = "Average Math and Verbal Scores by State and Year",
y = "Average Total Scores") +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16),
axis.title = element_text(face = "bold", size = 14),
axis.text = element_text(size = 12),
legend.title = element_text(face = "bold", size = 12),
legend.text = element_text(size = 10),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "#f0f0f0"),
plot.background = element_rect(fill = "#f0f0f0"),
legend.background = element_rect(fill = "#f0f0f0")
)
```
```{r}
# Grouping data by Year to then calculate the average test-takers scores for each gender
US_Scores$YearGroup <- cut(US_Scores$Year,
breaks = seq(2005, 2016, by = 1),
labels = c("2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015"))
# Creating the average test scores for both Male and Female test scores
topscores <- US_Scores %>%
group_by(YearGroup) %>%
summarize(
Avg_Female_TestTakers = mean(Gender.Female.Test.takers, na.rm = TRUE),
Avg_Male_TestTakers = mean(Gender.Male.Test.takers, na.rm = TRUE)
) %>%
ungroup()
# Creating a bar chart for data visualization
ggplot(topscores, aes(x = YearGroup)) +
geom_bar(aes(y = Avg_Female_TestTakers, fill = "Female"), stat = "identity", position = "dodge", width = 0.7) +
geom_bar(aes(y = Avg_Male_TestTakers, fill = "Male"), stat = "identity", position = "dodge", width = 0.7) +
labs(title = "Average Test-Takers", x = "Year Group", y = "Average Test-Takers") +
scale_fill_manual(values = c("Female" = "red", "Male" = "blue")) +
theme_minimal() +
geom_text(aes(x = YearGroup, y = round(Avg_Female_TestTakers), label = round(Avg_Female_TestTakers)),
position = position_dodge(width = 0.7), vjust = 1.65, size = 3) +
geom_text(aes(x = YearGroup, y = round(Avg_Male_TestTakers), label = round(Avg_Male_TestTakers)),
position = position_dodge(width = 0.7), vjust = -0.65, size = 3) +
scale_x_discrete(labels = c("2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015"))
```