Skip to content

Latest commit

 

History

History
154 lines (134 loc) · 5.46 KB

File metadata and controls

154 lines (134 loc) · 5.46 KB

US_Education_Curriculum_Analysis

A project providing evidence on how Big Data Analytics be used to provide insight into curriculum superiority and reformation amongst the nationally scoring US States.


title: "US EDUCATION EFFICIENCY ANALYSIS" output: html_notebook

Install Packages

#install.packages("tidyverse")
#install.packages("psych")

Library Packages

#library(tidyverse)
#library(readr)
#library(dplyr)
#library(scales)
#library(ggplot2)
#library(psych)

Read dataset

#Choose the CSV file interactively
#file_path <- file.choose()

#Read the CSV file into R
US_Scores <- read.csv(file_path)

head(US_Scores)

Create a summary of the dataset to view all columns and components

summary(US_Scores)

Create a subset of various states for analysis

# The states chosen for this subset are from various regions of the country

Five_State_Scores <- subset(US_Scores,State.Name == "North Dakota" |
                                      State.Name == "California" |
                                      State.Name == "Florida" | 
                                      State.Name == "Texas" |
                                      State.Name == "New York")
Five_State_Scores

Data Visualization of math SAT test scores

# Install and load the 'scales' package if not already installed
if (!requireNamespace("scales", quietly = TRUE)) {
  install.packages("scales")
}

# Load the 'scales' package
library(scales)

# Your ggplot code with the scale_x_continuous modification
ggplot(data = Five_State_Scores, mapping = aes(x = Year, y = Total.Math, linetype = State.Name)) +
  geom_point() +
  geom_smooth(mapping = aes(color = State.Name)) +
  scale_x_continuous(breaks = pretty_breaks()) +
  labs(title = "ND, FL, CA, TX, NY Math Scores",
       y = "Math Test Scores")

Data Visualization of verbal SAT test scores

ggplot( data = Five_State_Scores, mapping = aes(x = Year, y = Total.Verbal, linetype = State.Name)) +
        geom_smooth(mapping = aes(color = State.Name)) +
        geom_point() +
        scale_x_continuous(breaks = pretty_breaks()) +
        labs(title = "ND, FL, CA, TX, NY Verbal Scores",
             y = "Verbal Test Scores")

Examining the top 5 states

# Install and load the required packages if not already installed
if (!requireNamespace("tidyverse", quietly = TRUE)) {
  install.packages("tidyverse")
}

# Load the 'tidyverse' package
library(tidyverse)

# Create a bar chart for average math and verbal scores by state
Five_State_Scores %>%
  group_by(State.Name, Year) %>%
  summarise(Avg_Math = mean(Total.Math, na.rm = TRUE),
            Avg_Verbal = mean(Total.Verbal, na.rm = TRUE)) %>%
  ggplot(aes(x = Year, y = Avg_Math + Avg_Verbal, fill = State.Name)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Average Math and Verbal Scores by State and Year",
       y = "Average Total Scores") +
  theme_minimal()

# Create a line graph for average math and verbal scores by state
Five_State_Scores %>%
  group_by(State.Name, Year) %>%
  summarise(Avg_Math = mean(Total.Math, na.rm = TRUE),
            Avg_Verbal = mean(Total.Verbal, na.rm = TRUE)) %>%
  ggplot(aes(x = Year, y = Avg_Math + Avg_Verbal, color = State.Name)) +
  geom_line(size = 1.5) +  # Adjust line thickness
  labs(title = "Average Math and Verbal Scores by State and Year",
       y = "Average Total Scores") +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16),
    axis.title = element_text(face = "bold", size = 14),
    axis.text = element_text(size = 12),
    legend.title = element_text(face = "bold", size = 12),
    legend.text = element_text(size = 10),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.background = element_rect(fill = "#f0f0f0"),
    plot.background = element_rect(fill = "#f0f0f0"),
    legend.background = element_rect(fill = "#f0f0f0")
  )
# Grouping data by Year to then calculate the average test-takers scores for each gender
US_Scores$YearGroup <- cut(US_Scores$Year, 
                            breaks = seq(2005, 2016, by = 1),
                            labels = c("2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015"))

# Creating the average test scores for both Male and Female test scores
topscores <- US_Scores %>%
  group_by(YearGroup) %>%
  summarize(
    Avg_Female_TestTakers = mean(Gender.Female.Test.takers, na.rm = TRUE),
    Avg_Male_TestTakers = mean(Gender.Male.Test.takers, na.rm = TRUE)
  ) %>%
  ungroup()

# Creating a bar chart for data visualization
ggplot(topscores, aes(x = YearGroup)) +
  geom_bar(aes(y = Avg_Female_TestTakers, fill = "Female"), stat = "identity", position = "dodge", width = 0.7) +
  geom_bar(aes(y = Avg_Male_TestTakers, fill = "Male"), stat = "identity", position = "dodge", width = 0.7) +
  labs(title = "Average Test-Takers", x = "Year Group", y = "Average Test-Takers") +
  scale_fill_manual(values = c("Female" = "red", "Male" = "blue")) +
  theme_minimal() +
  geom_text(aes(x = YearGroup, y = round(Avg_Female_TestTakers), label = round(Avg_Female_TestTakers)), 
            position = position_dodge(width = 0.7), vjust = 1.65, size = 3) +
  geom_text(aes(x = YearGroup, y = round(Avg_Male_TestTakers), label = round(Avg_Male_TestTakers)), 
            position = position_dodge(width = 0.7), vjust = -0.65, size = 3) +
  scale_x_discrete(labels = c("2005", "2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015"))