From 50a0c71661ccb3979a2e967025915a4b01bca4ff Mon Sep 17 00:00:00 2001 From: apigap01 Date: Wed, 2 Oct 2024 15:10:16 +0100 Subject: [PATCH 1/4] Active travel to school 2023 update --- Active travel to school.R | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/Active travel to school.R b/Active travel to school.R index 3197159..812ac7a 100644 --- a/Active travel to school.R +++ b/Active travel to school.R @@ -5,24 +5,20 @@ # They usually sent across without having to request as it's a regular request - ### 2. Packages/dependencies ------ source("./1.indicator_analysis.R") -library(readxl) -library(janitor) -library(purrr) - +library(purrr) #for applying functions across elements of a list +library(rio) #for reading in Excel data across multiple sheets ### 3. Clean data ------ # filepath -path <- paste0(data_folder,"Received Data/Copy of Hands Up Scotland data for ScotPHO_2008 to 2022.xlsx") - +path <- paste0(data_folder,"Received Data/Active Travel to School/Hands_up_Scotland.xlsx") # get name of sheets sheet <- excel_sheets(path) - +sheet <- sheet[-c(1, 18)] #dropping contents page and footnotes # read in data from each sheet and apply sheet names as a df column # this is because each years data is on a seperate tab @@ -32,12 +28,12 @@ data <- lapply(setNames(sheet, sheet), 10:12, # active travel primary school 18:20, # active travel secondary school 45, # total survey respondents primary school - 47)]) %>% # total survey respondents secondary school + 47)]) |> # total survey respondents secondary school clean_names() # convert columns to class numeric, except local authority column -data <- map(data, ~ .x %>% +data <- map(data, ~ .x |> mutate(across(-`Local Authority`, as.numeric))) @@ -46,9 +42,9 @@ data <- bind_rows(data, .id="Sheet") # calculate numerator and denominator -data <- data %>% - mutate(numerator = rowSums(select(., contains(c("Walk", "Cycle", "Scooter")))), - denominator = rowSums(select(., contains(c("Responses")))), +data <- data |> + mutate(numerator = rowSums(select(data, contains(c("Walk", "Cycle", "Scooter")))), + denominator = rowSums(select(data, contains(c("Responses")))), year = str_sub(Sheet, start = 2)) @@ -56,22 +52,22 @@ data <- data %>% la_lookup <- readRDS(paste0(lookups, "Geography/CAdictionary.rds")) -data <- data %>% +data <- data |> mutate(`Local Authority` = str_replace(`Local Authority`, "&","and"), `Local Authority` = str_replace(`Local Authority`, "Eilean Siar","Na h-Eileanan Siar"), `Local Authority` = str_replace(`Local Authority`, "Edinburgh City","City of Edinburgh") - ) %>% - left_join(la_lookup, by = c("Local Authority" = "areaname")) %>% + ) |> + left_join(la_lookup, by = c("Local Authority" = "areaname")) |> rename(ca = code) # select final columns -data <- data %>% +data <- data |> select(ca, year, numerator, denominator) # drop N/A rows -data <- data %>% +data <- data |> filter(if_any(c(ca, numerator, denominator), complete.cases)) @@ -83,7 +79,7 @@ saveRDS(data, paste0(data_folder, "Prepared Data/active_travel_to_school_raw.rds ### 4. Run analysis functions ------ analyze_first(filename = "active_travel_to_school", geography = "council", - measure = "percent", yearstart = 2008, yearend = 2022, time_agg = 1) + measure = "percent", yearstart = 2008, yearend = 2023, time_agg = 1) analyze_second(filename = "active_travel_to_school", measure = "percent", time_agg = 1, From 08246929448a04fe3bd278c1477defbcd68d233b Mon Sep 17 00:00:00 2001 From: apigap01 Date: Wed, 2 Oct 2024 15:14:14 +0100 Subject: [PATCH 2/4] Neighbourhood perceptions redevelopment and 2022 update --- Neighbourhood perceptions.R | 266 +++++++++++++++++++----------------- 1 file changed, 138 insertions(+), 128 deletions(-) diff --git a/Neighbourhood perceptions.R b/Neighbourhood perceptions.R index facdbd3..59359ba 100644 --- a/Neighbourhood perceptions.R +++ b/Neighbourhood perceptions.R @@ -1,16 +1,12 @@ -################################################################################ -################################################################################ -######### ######### -##### Neighbourhood perceptions ##### -######### ######### -################################################################################ -################################################################################ +############################################. +## Analyst notes ---- +############################################. # This script covers three indicators: # -# - People perceiving rowdy behaviour very/fairly common in their neighbourhood -# - Adults rating neighbourhood as very good place to live -# - Perception of drug misuse in neighbourhood +# - People perceiving rowdy behaviour very/fairly common in their neighbourhood (4115) +# - Adults rating neighbourhood as very good place to live (20903) +# - Perception of drug misuse in neighbourhood (4203) # # Data is sourced from the Scottish Household Survey - contact # Hannah.Wolfram@gov.scot @@ -33,143 +29,158 @@ # The data just needs to be formatted to match the last updates format # (e.g /Shiny Data/4203 Perception drug misuse_shiny) # -# Section 4 - Checks includes only a very rudimentary check of this years update +# Checks section includes only a very rudimentary check of this years update # against last years, grouping by year -# Libraries --------------------------------------------------------------- +###############################################. +## Filepaths/Functions/Lookups/Packages ---- +###############################################. +source("1.indicator_analysis.R") #functions not actually used - quicker way to load packages in -library(dplyr) -library(stringr) -library(stringr) -library(janitor) +filepath <- paste0(data_folder, "Received Data/Neighbourhood perceptions/Final tables 2024.xlsx") #setting filepath -source("1.indicator_analysis.R") +library(rio) #used for reading in data from various sheets -# 1. Read in data ------------------------------------------------------------ - -# a) Received data: -neighbour <- read.csv(paste0(data_folder,"Received Data/Neighbourhood perceptions 2023/", - "SHS perception of drug missuse neighbourhood rating", - " perception of rowdiness.csv")) |> - clean_names() - -# b) Area lookups to match codes to given geographies -# The ADP needs to be read in and matched to the data separately as there -# are some local authorities that have the same name. +# ADP lookup needs to be read in and matched to the data separately as there +# are some local authorities that have the same name. area_codes <- readRDS(paste0(data_folder,"Lookups/Geography/codedictionary.rds")) |> - filter(str_detect(code, "S00|S12|S00|S08")) + filter(str_detect(code, "S00|S12|S08|S11")) area_codes_adp <- readRDS(paste0(data_folder,"Lookups/Geography/codedictionary.rds")) |> filter(str_detect(code, "S11")) -# 2. Data manipulation ------------------------------------------------------- - -# a) Join ADP data with area code lookup (fixing instances where names differ) -# select only relevant columns - -# NB: some instances where numerator is ".". Is it safe to assume these are 0? -# NA's in a numeric vector will be incorrectly handled. - -neighbourADP <- neighbour |> - filter(geography_type == "Alcohol & Drug Partnership") |> - left_join(area_codes_adp, by = c("geography" = "areaname")) |> - mutate(code = case_when(geography == "MALDEP" ~ "S11000051", - geography == "Lanarkshire ADP" ~ "S11000052", - .default = code)) - -# b) Join the remaining area data (excluding the ADP) to neighbour ADP -neighbour2 <- neighbour |> - filter(geography_type != "Alcohol & Drug Partnership") |> - # Fix issues with geography names - mutate(geography = case_when(str_detect(geography, "&") ~ str_replace(geography, "&", "and"), - geography_type == "Health Board" ~ paste("NHS",geography), - .default = geography), - geography = case_match(geography, "Edinburgh, City of" ~ "City of Edinburgh", - "Eilean Siar"~ "Na h-Eileanan Siar", - .default = geography), - geography = case_when(geography_type == "Health Board" & str_detect(geography, "NHS Orkney Islands") ~ "NHS Orkney", - geography_type == "Health Board" & str_detect(geography, "NHS Shetland Islands") ~ "NHS Shetland", - .default = geography))|> - full_join(area_codes, by = c("geography" = "areaname")) |> - bind_rows(neighbourADP) |> - mutate(code = case_when(geography == "MALDEP" ~ "S11000051", - geography== "Lanarkshire ADP" ~ "S11000052", - .default = code)) |> - # select only the columns used in the shiny data (excluding ind_id which is - # added later) and then rename them - # - # columns in shiny data: - # code ind_id year numerator rate lowci upci def_period trend_axis - select(indicator, code, year, percentage, lower_95_ci, upper_95_ci) |> - rename(def_period = year, - rate = percentage, - lowci = lower_95_ci, - upci = upper_95_ci) |> - # create remaining columns for extract: - mutate(year = as.numeric(case_when(def_period == "2007-2008" ~ "2007", - def_period == "2009-2010" ~ "2009", - .default = def_period)), - trend_axis = case_when(def_period == "2007-2008" ~ "2007/2008", - def_period == "2009-2010" ~ "2009/2009", - .default = def_period), - def_period = case_when(def_period == "2007-2008" ~ "2007 to 2008 survey years; 2-year aggregates", - def_period == "2009-2010" ~ "2009 to 2010 survey years; 2-year aggregates", - .default = paste(def_period,"survey year")), - numerator = "NA") - -test<- neighbour2 |> - filter(is.na(rate)) - - -# 3. Final indicator data sets ------------------------------------------- - -# a) 4115 data for People perceiving rowdy behaviour very/fairly common -# in their neighbourhood -rowdy <- neighbour2 |> - filter(str_detect(indicator, "rowdy"))|> - mutate(ind_id = 4115) |> - select(code, ind_id, year, numerator, rate, lowci, upci, def_period, trend_axis) - -saveRDS(rowdy, file = paste0(data_folder, "Data to be checked/perceiving_rowdy_behaviour_shiny.rds")) -write.csv(rowdy, file = paste0(data_folder, "Data to be checked/perceiving_rowdy_behaviour_shiny.csv"),row.names = F) - - -# c) 20903 data for Adults rating neighbourhood as very good place to live -very_good <- neighbour2 |> - filter(str_detect(indicator, "very good"))|> - mutate(ind_id = 20903) |> - select(code, ind_id, year, numerator, rate, lowci, upci, def_period, trend_axis) - -saveRDS(very_good, file = paste0(data_folder, "Data to be checked/adults_rating_neighbourhood_very_good_shiny.rds")) -write.csv(very_good, file = paste0(data_folder, "Data to be checked/adults_rating_neighbourhood_very_good_shiny.csv"),row.names = F) - - -# d) 4203 Perception of drug misuse in neighbourhood -drug_misuse <- neighbour2 |> - filter(str_detect(indicator, "drug")) |> - mutate(ind_id = 4203) |> - select(code, ind_id, year, numerator, rate, lowci, upci, def_period, trend_axis) +###############################################. +## Read in all data ---- +###############################################. + +all_data <- import_list(filepath) #from rio package, converts each sheets into a df within a list +all_data<- all_data[-c(1,2)] #drop cover page and contents + +###############################################. +## Create function for cleaning data ---- +###############################################. + +indicator_cleaning <- function(id, scot_df, hb_df, adp_df = NULL, ca_df = NULL, area_codes, area_codes_adp){ + + #scotland dfs + scot_df <- scot_df |> + row_to_names(row_number = 1) |> #set first row as headings + mutate(areatype = c("Scotland")) |> #create areatype variable and set to Scotland + mutate(areaname = c("Scotland")) |> #create areaname variable and set to Scotland + left_join(filter(area_codes, str_detect(code, "S00"))) |> #create code column from lookup + select(areaname, everything()) #makes area name column first in line with other areatypes + + #hb dfs + hb_df <- hb_df |> + row_to_names(row_number = 1) |> + mutate(areatype = c("Health board")) |> + rename(areaname = `NHS Board`) |> + mutate(areaname = case_when(areaname == "Orkney Islands" ~ "Orkney", + areaname == "Shetland Islands" ~ "Shetland", + .default = areaname)) |> #removing "Islands" from Orkney and Shetland + mutate(areaname = paste("NHS", areaname)) |> #paste NHS on HB names to match lookup + left_join(filter(area_codes, str_detect(code, "S08"))) #joining with lookup + + #function can only take adp OR ca, not both, + # and produces 1 df containing whichever of ca/adp is passed into function + if(is.null(adp_df)){ + ca_adp_df <- ca_df |> + row_to_names(row_number = 1) |> + mutate(areatype = c("Council area")) |> + rename(areaname = `Local authority`) |> + mutate(areaname = case_when(str_detect(areaname, "&") ~ str_replace(areaname, "&", "and"), #replace all & with "and" + areaname == "Edinburgh, City of" ~ "City of Edinburgh", + .default = areaname)) |> + left_join(filter(area_codes, str_detect(code, "S12"))) + } else { + ca_adp_df <- adp_df |> + row_to_names(row_number = 1) |> + mutate(areatype = c("Alcohol & drug partnership")) |> + rename(areaname = `Alcohol & Drug Partnership`) |> + left_join(area_codes_adp) |> + mutate(code = case_when(areaname == "MALDEP" ~ "S11000051", + areaname == "Lanarkshire ADP" ~ "S11000052", + .default = code)) + } + + #combine scot, hb and adp/ca dfs + cleaned_df <- rbind(scot_df, hb_df, ca_adp_df) + + cleaned_df <- cleaned_df |> + clean_names() |> #clean col names + mutate_at(c(3:6), as.numeric) |> #convert columns with data to numeric + mutate(across(where(is.numeric), round, 1)) |> #round to 1dp + mutate(ind_id = id, #create indicator id col based on argument to function + numerator = "NA", #create numerator + def_period = year, #duplicate year column to create trend axis col + trend_axis = case_when(def_period == "2007-2008" ~ "2007/2008", #create trend axis col + def_period == "2009-2010" ~ "2009/2010", + .default = def_period), + def_period = case_when(def_period == "2007-2008" ~ "2007 to 2008 survey years; 2-year aggregates", + def_period == "2009-2010" ~ "2009 to 2010 survey years; 2-year aggregates", + .default = paste(def_period,"survey year")), #create def_period col + year = substr(year, 1, 4), #keep only first year for multi-year rows + year = as.integer(year)) |> + select(code, ind_id, year, numerator, percent, lower_95_percent_ci, upper_95_percent_ci, def_period, trend_axis) |> #drop unnecessary cols + rename(rate = percent, #rename cols to align with shiny data + lowci = lower_95_percent_ci, + upci = upper_95_percent_ci) +} + + +###############################################. +## Run function for drug misuse (4203) ---- +###############################################. + +drug_misuse <- indicator_cleaning(id = "4203" , + all_data$Table_1, all_data$Table_2, all_data$Table_3, + area_codes = area_codes, area_codes_adp = area_codes_adp) saveRDS(drug_misuse, file = paste0(data_folder, "Data to be checked/perception_drug_misuse_shiny.rds")) write.csv(drug_misuse, file = paste0(data_folder, "Data to be checked/perception_drug_misuse_shiny.csv"),row.names = F) -# 4. Checks --------------------------------------------------------------- +###############################################. +## Run function for rowdy behaviour (4115) ------ +###############################################. + +rowdy_behaviour <- indicator_cleaning(id = "4115", + all_data$Table_4, all_data$Table_5, all_data$Table_6, + area_codes = area_codes, area_codes_adp = area_codes_adp) + +saveRDS(rowdy_behaviour, file = paste0(data_folder, "Data to be checked/perceiving_rowdy_behaviour_shiny.rds")) +write.csv(rowdy_behaviour, file = paste0(data_folder, "Data to be checked/perceiving_rowdy_behaviour_shiny.csv"),row.names = F) + +###############################################. +## Run function for neighbourhood good place (20903) ---- +###############################################. + +good_place <- indicator_cleaning(id = "20903", + all_data$Table_7, all_data$Table_8, ca_df = all_data$Table_9, + area_codes = area_codes, area_codes_adp = area_codes_adp) + +saveRDS(good_place, file = paste0(data_folder, "Data to be checked/adults_rating_neighbourhood_very_good_shiny.rds")) +write.csv(good_place, file = paste0(data_folder, "Data to be checked/adults_rating_neighbourhood_very_good_shiny.csv"),row.names = F) + + +############################################. +## Analyst notes ---- +############################################. ## As there is no analyse_second function used for these indicators, run the following to ## check data against last years (may need to change the file names) -# a) Read in last years data -last_year_rowdy <- read.csv(paste0(data_folder, "Shiny Data/4115 Rowdy behaviour_shiny.csv")) -last_year_very_good <- read.csv(paste0(data_folder, "Shiny Data/20903_Neighbourhood_rating_shiny.csv")) +# Read in last years data +last_year_rowdy_behaviour <- read.csv(paste0(data_folder, "Shiny Data/4115 Rowdy behaviour_shiny.csv")) +last_year_good_place <- read.csv(paste0(data_folder, "Shiny Data/20903_Neighbourhood_rating_shiny.csv")) last_year_drug_misuse <- read.csv(paste0(data_folder, "Shiny Data/4203 Perception drug misuse_shiny.csv")) -# b) function to check totals of shared years +# Function to check totals of shared years check_year_totals <- function(last_year_data, this_year_data){ last_year_max <- as.numeric(max(last_year_data$year)) - + last_year <- last_year_data |> group_by(year) |> summarize(last_year_sum = sum(rate)) |> @@ -197,12 +208,11 @@ check_year_totals <- function(last_year_data, this_year_data){ if(test$test_column[1] == 0) { print("All totals match") - - + } else { non_match <- filter(both_years, check != 0) - + return(non_match) print("Totals don't match. See non_match dataframe.") } @@ -210,6 +220,6 @@ check_year_totals <- function(last_year_data, this_year_data){ # Check totals -check_year_totals(last_year_data = last_year_rowdy, this_year_data = rowdy) -check_year_totals(last_year_data = last_year_very_good, this_year_data = very_good) -check_year_totals(last_year_data = last_year_drug_misuse, this_year_data = drug_misuse) +check_year_totals(last_year_data = last_year_rowdy_behaviour, this_year_data = rowdy_behaviour) +check_year_totals(last_year_data = last_year_good_place, this_year_data = good_place) +check_year_totals(last_year_data = last_year_drug_misuse, this_year_data = drug_misuse) \ No newline at end of file From d95ef32e11230da25efd908d7d19ebeded620723 Mon Sep 17 00:00:00 2001 From: apigap01 Date: Wed, 2 Oct 2024 15:18:29 +0100 Subject: [PATCH 3/4] Single adult dwellings 2023 update --- Single Adult Dwellings.R | 73 ++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/Single Adult Dwellings.R b/Single Adult Dwellings.R index bf74cc8..3bc1841 100644 --- a/Single Adult Dwellings.R +++ b/Single Adult Dwellings.R @@ -16,25 +16,25 @@ source("2.deprivation_analysis.R") # deprivation function #Reading data in directly from website that was manually downloaded for previous program -col_names_n <- c("datazone", "name", 2006:2022) -col_names_d <- c("datazone", "name", 2001:2022) +col_names_n <- c("datazone", "name", 2006:2023) +col_names_d <- c("datazone", "name", 2001:2023) #read data in direct from source sad_data_extract <- bind_rows(read_csv("https://statistics.gov.scot/slice/observations.csv?&dataset=http%3A%2F%2Fstatistics.gov.scot%2Fdata%2Fhousehold-estimates&http%3A%2F%2Fpurl.org%2Flinked-data%2Fcube%23measureType=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fmeasure-properties%2Fcount&http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fdimension%2Findicator%28dwellings%29=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fconcept%2Findicator-dwellings%2Fwith-single-adult-discounts", - skip=8, col_names = col_names_n) %>% mutate(type = "numerator"), - read_csv("https://statistics.gov.scot/slice/observations.csv?&dataset=http%3A%2F%2Fstatistics.gov.scot%2Fdata%2Fhousehold-estimates&http%3A%2F%2Fpurl.org%2Flinked-data%2Fcube%23measureType=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fmeasure-properties%2Fcount&http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fdimension%2Findicator%28dwellings%29=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fconcept%2Findicator-dwellings%2Ftotal-dwellings", - skip = 8, col_names = col_names_d) %>% mutate(type = "denominator")) #%>% - #janitor::clean_names() + skip=8, col_names = col_names_n) |> mutate(type = "numerator"), + read_csv("https://statistics.gov.scot/slice/observations.csv?&dataset=http%3A%2F%2Fstatistics.gov.scot%2Fdata%2Fhousehold-estimates&http%3A%2F%2Fpurl.org%2Flinked-data%2Fcube%23measureType=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fmeasure-properties%2Fcount&http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fdimension%2Findicator%28dwellings%29=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fconcept%2Findicator-dwellings%2Ftotal-dwellings", + skip = 8, col_names = col_names_d) |> mutate(type = "denominator")) #|> +#janitor::clean_names() sad_data_extract <-subset(sad_data_extract, select = -c (name, `2006`, `2001`:`2005`)) -sad_data_extract_format <- sad_data_extract %>% +sad_data_extract_format <- sad_data_extract |> mutate (datazone=gsub("http://statistics.gov.scot/id/statistical-geography/","", datazone)) #filter to only datazone level sad_data_extract_format <- filter(sad_data_extract_format,substr(datazone,1,3)=="S01") #pivot_longer years to one column -sad_data_extract_pivot <- sad_data_extract_format %>% - pivot_longer(cols = c(`2007`:`2022`), names_to = "year", values_to = "count") %>% -#pivot_wider type to two different columns for numerator and denominator - pivot_wider(names_from = type, values_from = count) %>% +sad_data_extract_pivot <- sad_data_extract_format |> + pivot_longer(cols = c(`2007`:`2023`), names_to = "year", values_to = "count") |> + #pivot_wider type to two different columns for numerator and denominator + pivot_wider(names_from = type, values_from = count) |> filter(!is.na(denominator)) @@ -43,30 +43,31 @@ saveRDS(sad_data_extract_pivot, file=paste0(data_folder, 'Prepared Data/Single_D #### Match lookup - datazone with local authority # dz01 Lookup file for CA -dz01_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Deprivation/DataZone2001_all_simd.rds')%>% - setNames(tolower(names(.))) %>% #variables to lower case - select(ca2019, datazone2001) - # #Dealing with changes in ca, hb and hscp codes. Transforms old code versions into 2019 ones - #mutate(ca2011 = recode(ca2011, "S12000015"='S12000047', "S12000024"='S12000048', - # "S12000046"='S12000049', "S12000044"='S12000050')) +dz01_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Deprivation/DataZone2001_all_simd.rds')|> + clean_names() |> #variables to lower case + select(ca2019, data_zone2001) |> + rename(datazone2001 = data_zone2001) +# #Dealing with changes in ca, hb and hscp codes. Transforms old code versions into 2019 ones +#mutate(ca2011 = recode(ca2011, "S12000015"='S12000047', "S12000024"='S12000048', +# "S12000046"='S12000049', "S12000044"='S12000050')) # \\Isdsf00d03\cl-out\lookups\Unicode\Geography\DataZone2011 #Preparing file for CA for period 2007 to 2014 (2014 only including dz <= S01006505) -sad01_data <- sad_data_extract_pivot %>% filter(year<=2014) +sad01_data <- sad_data_extract_pivot |> filter(year<=2014) #Merging with lookup -sad01_data <- left_join(sad01_data, dz01_lookup, by = c("datazone" = "datazone2001")) %>% - rename(ca = ca2019) %>% filter(datazone<='S01006505') %>% mutate(dz = "dz01") +sad01_data <- left_join(sad01_data, dz01_lookup, by = c("datazone" = "datazone2001")) |> + rename(ca = ca2019) |> filter(datazone<='S01006505') |> mutate(dz = "dz01") -dz11_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Deprivation/DataZone2011_simd2020v2.rds')%>% - setNames(tolower(names(.))) %>% #variables to lower case - select(ca2019, datazone2011) +dz11_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Deprivation/DataZone2011_simd2020v2.rds')|> + clean_names() |> #variables to lower case + select(ca2019, datazone2011) #|> #Preparing file for CA for period 2014 to 2017 (2014 only including dz > S01006505) -sad11_data <- sad_data_extract_pivot %>% filter(year>=2014) +sad11_data <- sad_data_extract_pivot |> filter(year>=2014) #Merging with lookup -sad11_data <- left_join(sad11_data, dz11_lookup, by = c("datazone" = "datazone2011")) %>% - rename(ca = ca2019) %>% filter(datazone>'S01006505') %>% mutate(dz = "dz11") +sad11_data <- left_join(sad11_data, dz11_lookup, by = c("datazone" = "datazone2011")) |> + rename(ca = ca2019) |> filter(datazone>'S01006505') |> mutate(dz = "dz11") # Merge dz01 & dz11 data into single file (Basefile) sad_data_raw <- full_join(sad01_data, sad11_data) @@ -77,20 +78,20 @@ sad_data_raw <- full_join(sad01_data, sad11_data) #### Prepare / Aggregate for specified geographies - LA, DZ11 & base IRs # Prepare / Aggregate by la -sadla_data_raw <- sad_data_raw %>% - group_by(ca, year, dz) %>% - summarise_at(c("numerator", "denominator"), sum, na.rm =T) %>% - filter(dz != "dz01" | year != "2014") %>% ungroup() +sadla_data_raw <- sad_data_raw |> + group_by(ca, year, dz) |> + summarise_at(c("numerator", "denominator"), sum, na.rm =T) |> + filter(dz != "dz01" | year != "2014") |> ungroup() sadla_data_raw <- select(sadla_data_raw,-c(dz)) saveRDS(sadla_data_raw, file=paste0(data_folder, 'Prepared Data/Single_Dwellings_LA_raw.rds')) # Prepare / Aggregate by dz11 -sad11_data_raw <- sad_data_raw %>% - group_by(datazone, year, dz) %>% - summarise_at(c("numerator", "denominator"), list(sum), na.rm =T) %>% - filter(dz == "dz11") %>% ungroup() +sad11_data_raw <- sad_data_raw |> + group_by(datazone, year, dz) |> + summarise_at(c("numerator", "denominator"), list(sum), na.rm =T) |> + filter(dz == "dz11") |> ungroup() sad11_data_raw <- select(sad11_data_raw,-c(dz)) @@ -105,7 +106,7 @@ analyze_first(filename = "Single_Dwellings_LA", geography = "council", measure = yearstart = 2007, yearend = 2013, time_agg = 1) analyze_first(filename = "Single_Dwellings_dz11", geography = "datazone11", measure = "percent", - yearstart = 2014, yearend = 2022, time_agg = 1) + yearstart = 2014, yearend = 2023, time_agg = 1) # Merging CA, DZ11 together and save both periods together all_data <- rbind(readRDS(paste0(data_folder, "Temporary/Single_Dwellings_LA_formatted.rds")), @@ -118,7 +119,7 @@ analyze_second(filename = "Single_Dwellings_all", measure = "percent", time_agg #Deprivation analysis function analyze_deprivation(filename="Single_Dwellings_depr", measure="percent", time_agg=1, - yearstart= 2007, yearend=2022, year_type = "calendar", + yearstart= 2007, yearend=2023, year_type = "calendar", ind_id = 20504) ##END From 98c65863110c19ea63869c51b519b4b9ef3c00f8 Mon Sep 17 00:00:00 2001 From: Monica McGibbon Date: Tue, 8 Oct 2024 15:43:27 +0100 Subject: [PATCH 4/4] re-run deprivation function --- Single Adult Dwellings.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Single Adult Dwellings.R b/Single Adult Dwellings.R index 3bc1841..c5e9038 100644 --- a/Single Adult Dwellings.R +++ b/Single Adult Dwellings.R @@ -119,7 +119,7 @@ analyze_second(filename = "Single_Dwellings_all", measure = "percent", time_agg #Deprivation analysis function analyze_deprivation(filename="Single_Dwellings_depr", measure="percent", time_agg=1, - yearstart= 2007, yearend=2023, year_type = "calendar", + yearstart= 2007, yearend=2022, year_type = "calendar", ind_id = 20504) ##END