From 50a0c71661ccb3979a2e967025915a4b01bca4ff Mon Sep 17 00:00:00 2001
From: apigap01 <abigail.prescott@phs.scot>
Date: Wed, 2 Oct 2024 15:10:16 +0100
Subject: [PATCH 1/4] Active travel to school 2023 update

---
 Active travel to school.R | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/Active travel to school.R b/Active travel to school.R
index 3197159..812ac7a 100644
--- a/Active travel to school.R	
+++ b/Active travel to school.R	
@@ -5,24 +5,20 @@
 # They usually sent across without having to request as it's a regular request
 
 
-
 ### 2. Packages/dependencies ------
 source("./1.indicator_analysis.R")
-library(readxl)
-library(janitor)
-library(purrr)
-
+library(purrr) #for applying functions across elements of a list
+library(rio) #for reading in Excel data across multiple sheets
 
 
 ### 3. Clean data ------
 
 # filepath 
-path <- paste0(data_folder,"Received Data/Copy of Hands Up Scotland data for ScotPHO_2008 to 2022.xlsx")
-
+path <- paste0(data_folder,"Received Data/Active Travel to School/Hands_up_Scotland.xlsx")
 
 # get name of sheets
 sheet <- excel_sheets(path)
-
+sheet <- sheet[-c(1, 18)] #dropping contents page and footnotes
 
 # read in data from each sheet and apply sheet names as a df column 
 # this is because each years data is on a seperate tab
@@ -32,12 +28,12 @@ data <- lapply(setNames(sheet, sheet),
                                                                    10:12, # active travel primary school
                                                                    18:20, # active travel secondary school
                                                                    45, # total survey respondents primary school
-                                                                   47)]) %>%  # total survey respondents secondary school
+                                                                   47)]) |>  # total survey respondents secondary school
   clean_names()
 
 
 # convert columns to class numeric, except local authority column
-data <- map(data, ~ .x %>%
+data <- map(data, ~ .x |>
               mutate(across(-`Local Authority`, as.numeric)))
 
 
@@ -46,9 +42,9 @@ data <- bind_rows(data, .id="Sheet")
 
 
 # calculate numerator and denominator
-data <- data %>%
-  mutate(numerator = rowSums(select(., contains(c("Walk", "Cycle", "Scooter")))),
-         denominator = rowSums(select(., contains(c("Responses")))),
+data <- data |>
+  mutate(numerator = rowSums(select(data, contains(c("Walk", "Cycle", "Scooter")))),
+         denominator = rowSums(select(data, contains(c("Responses")))),
          year = str_sub(Sheet, start = 2))
 
 
@@ -56,22 +52,22 @@ data <- data %>%
 la_lookup <- readRDS(paste0(lookups, "Geography/CAdictionary.rds"))
 
 
-data <- data %>%
+data <- data |>
   mutate(`Local Authority` = str_replace(`Local Authority`, "&","and"),
          `Local Authority` = str_replace(`Local Authority`, "Eilean Siar","Na h-Eileanan Siar"),
          `Local Authority` = str_replace(`Local Authority`, "Edinburgh City","City of Edinburgh")
-  ) %>%
-  left_join(la_lookup, by = c("Local Authority" = "areaname")) %>%
+  ) |>
+  left_join(la_lookup, by = c("Local Authority" = "areaname")) |>
   rename(ca = code)
 
 
 # select final columns 
-data <- data %>% 
+data <- data |> 
   select(ca, year, numerator, denominator)
 
 
 # drop N/A rows
-data <- data %>%
+data <- data |>
   filter(if_any(c(ca, numerator, denominator), complete.cases))
 
 
@@ -83,7 +79,7 @@ saveRDS(data, paste0(data_folder, "Prepared Data/active_travel_to_school_raw.rds
 
 ### 4. Run analysis functions ------
 analyze_first(filename = "active_travel_to_school", geography = "council", 
-              measure = "percent", yearstart = 2008, yearend = 2022, time_agg = 1)
+              measure = "percent", yearstart = 2008, yearend = 2023, time_agg = 1)
 
 
 analyze_second(filename = "active_travel_to_school", measure = "percent", time_agg = 1,

From 08246929448a04fe3bd278c1477defbcd68d233b Mon Sep 17 00:00:00 2001
From: apigap01 <abigail.prescott@phs.scot>
Date: Wed, 2 Oct 2024 15:14:14 +0100
Subject: [PATCH 2/4] Neighbourhood perceptions redevelopment and 2022 update

---
 Neighbourhood perceptions.R | 266 +++++++++++++++++++-----------------
 1 file changed, 138 insertions(+), 128 deletions(-)

diff --git a/Neighbourhood perceptions.R b/Neighbourhood perceptions.R
index facdbd3..59359ba 100644
--- a/Neighbourhood perceptions.R	
+++ b/Neighbourhood perceptions.R	
@@ -1,16 +1,12 @@
-################################################################################
-################################################################################
-#########                                                              #########
-#####                      Neighbourhood perceptions                       #####
-#########                                                              #########
-################################################################################
-################################################################################
+############################################.
+## Analyst notes ----
+############################################.
 
 # This script covers three indicators:
 # 
-# - People perceiving rowdy behaviour very/fairly common in their neighbourhood
-# - Adults rating neighbourhood as very good place to live
-# - Perception of drug misuse in neighbourhood
+# - People perceiving rowdy behaviour very/fairly common in their neighbourhood (4115)
+# - Adults rating neighbourhood as very good place to live (20903)
+# - Perception of drug misuse in neighbourhood (4203)
 #  
 #  Data is sourced from the Scottish Household Survey - contact
 #   Hannah.Wolfram@gov.scot
@@ -33,143 +29,158 @@
 #  The data just needs to be formatted to match the last updates format 
 #  (e.g /Shiny Data/4203 Perception drug misuse_shiny)
 #  
-#  Section 4 - Checks includes only a very rudimentary check of this years update
+#  Checks section includes only a very rudimentary check of this years update
 #  against last years, grouping by year
 
 
-# Libraries ---------------------------------------------------------------
+###############################################.
+## Filepaths/Functions/Lookups/Packages ----
+###############################################.
+source("1.indicator_analysis.R") #functions not actually used - quicker way to load packages in 
 
-library(dplyr)
-library(stringr)
-library(stringr)
-library(janitor)
+filepath <- paste0(data_folder, "Received Data/Neighbourhood perceptions/Final tables 2024.xlsx") #setting filepath
 
-source("1.indicator_analysis.R") 
+library(rio) #used for reading in data from various sheets
 
-# 1. Read in data ------------------------------------------------------------
-
-# a) Received data:
-neighbour <- read.csv(paste0(data_folder,"Received Data/Neighbourhood perceptions 2023/",
-                             "SHS perception of drug missuse neighbourhood rating",
-                             " perception of rowdiness.csv")) |> 
-  clean_names()
-
-# b) Area lookups to match codes to given geographies
-# The ADP needs to be read in and matched to the data separately as there 
-# are some local authorities that have the same name. 
+# ADP lookup needs to be read in and matched to the data separately as there 
+# are some local authorities that have the same name.
 area_codes <- readRDS(paste0(data_folder,"Lookups/Geography/codedictionary.rds")) |> 
-  filter(str_detect(code, "S00|S12|S00|S08"))
+  filter(str_detect(code, "S00|S12|S08|S11"))
 
 area_codes_adp <- readRDS(paste0(data_folder,"Lookups/Geography/codedictionary.rds")) |> 
   filter(str_detect(code, "S11"))
 
-# 2. Data manipulation -------------------------------------------------------
-
-# a) Join ADP data with area code lookup (fixing instances where names differ)
-# select only relevant columns 
- 
-# NB: some instances where numerator is ".". Is it safe to assume these are 0? 
-# NA's in a numeric vector will be incorrectly handled. 
-
-neighbourADP <- neighbour |> 
-  filter(geography_type == "Alcohol & Drug Partnership") |> 
-  left_join(area_codes_adp, by = c("geography" = "areaname")) |> 
-  mutate(code = case_when(geography == "MALDEP" ~ "S11000051",
-                          geography == "Lanarkshire ADP" ~ "S11000052",
-                          .default = code)) 
-
-# b) Join the remaining area data (excluding the ADP) to neighbour ADP
-neighbour2 <- neighbour |>  
-  filter(geography_type != "Alcohol & Drug Partnership") |> 
-  # Fix issues with geography names
-  mutate(geography = case_when(str_detect(geography, "&") ~ str_replace(geography, "&", "and"),
-                               geography_type == "Health Board" ~ paste("NHS",geography),
-                               .default = geography),
-        geography = case_match(geography, "Edinburgh, City of" ~ "City of Edinburgh",
-                               "Eilean Siar"~ "Na h-Eileanan Siar",
-                               .default = geography),
-         geography = case_when(geography_type == "Health Board" & str_detect(geography, "NHS Orkney Islands") ~ "NHS Orkney",
-                              geography_type == "Health Board" & str_detect(geography, "NHS Shetland Islands") ~ "NHS Shetland",
-                              .default = geography))|> 
-  full_join(area_codes, by = c("geography" = "areaname")) |> 
-  bind_rows(neighbourADP) |> 
-  mutate(code = case_when(geography == "MALDEP" ~ "S11000051",
-                   geography== "Lanarkshire ADP" ~ "S11000052",
-                   .default = code)) |> 
-  # select only the columns used in the shiny data (excluding ind_id which is 
-  # added later) and then rename them
-  # 
-  # columns in shiny data:
-  # code	ind_id	year	numerator rate	lowci	upci	def_period	trend_axis
-  select(indicator, code, year, percentage, lower_95_ci, upper_95_ci) |> 
-  rename(def_period = year,  
-         rate = percentage, 
-         lowci = lower_95_ci,
-         upci = upper_95_ci) |> 
-  # create remaining columns for extract:
-  mutate(year = as.numeric(case_when(def_period == "2007-2008" ~ "2007",
-                          def_period == "2009-2010" ~ "2009",
-                          .default = def_period)),
-         trend_axis = case_when(def_period == "2007-2008" ~ "2007/2008",
-                                def_period == "2009-2010" ~ "2009/2009",
-                                .default = def_period),
-         def_period = case_when(def_period == "2007-2008" ~ "2007 to 2008 survey years; 2-year aggregates",
-                                def_period == "2009-2010" ~ "2009 to 2010 survey years; 2-year aggregates",
-                                .default = paste(def_period,"survey year")),
-         numerator = "NA")
-
-test<- neighbour2 |> 
-  filter(is.na(rate))
-
-
-# 3. Final indicator data sets -------------------------------------------
-
-# a) 4115 data for People perceiving rowdy behaviour very/fairly common
-#  in their neighbourhood 
-rowdy <- neighbour2 |> 
-  filter(str_detect(indicator, "rowdy"))|> 
-  mutate(ind_id = 4115) |> 
-  select(code, ind_id, year, numerator, rate,	lowci,	upci,	def_period,	trend_axis)
-
-saveRDS(rowdy, file = paste0(data_folder, "Data to be checked/perceiving_rowdy_behaviour_shiny.rds"))
-write.csv(rowdy, file = paste0(data_folder, "Data to be checked/perceiving_rowdy_behaviour_shiny.csv"),row.names = F)
-
-
-# c) 20903 data for Adults rating neighbourhood as very good place to live
-very_good <- neighbour2 |> 
-  filter(str_detect(indicator, "very good"))|> 
-  mutate(ind_id = 20903) |> 
-  select(code, ind_id, year, numerator, rate,	lowci,	upci,	def_period,	trend_axis)
-
-saveRDS(very_good, file = paste0(data_folder, "Data to be checked/adults_rating_neighbourhood_very_good_shiny.rds"))
-write.csv(very_good, file = paste0(data_folder, "Data to be checked/adults_rating_neighbourhood_very_good_shiny.csv"),row.names = F)
-
-
-# d) 4203 Perception of drug misuse in neighbourhood
-drug_misuse <- neighbour2 |> 
-  filter(str_detect(indicator, "drug")) |> 
-  mutate(ind_id = 4203) |> 
-  select(code, ind_id, year, numerator, rate,	lowci,	upci,	def_period,	trend_axis)
+###############################################.
+## Read in all data ----
+###############################################.
+
+all_data <- import_list(filepath) #from rio package, converts each sheets into a df within a list
+all_data<- all_data[-c(1,2)] #drop cover page and contents
+
+###############################################.
+## Create function for cleaning data ----
+###############################################.
+
+indicator_cleaning <- function(id, scot_df, hb_df, adp_df = NULL, ca_df = NULL, area_codes, area_codes_adp){
+  
+  #scotland dfs
+  scot_df <- scot_df |> 
+    row_to_names(row_number = 1) |>  #set first row as headings
+    mutate(areatype = c("Scotland")) |> #create areatype variable and set to Scotland
+    mutate(areaname = c("Scotland")) |>  #create areaname variable and set to Scotland
+    left_join(filter(area_codes, str_detect(code, "S00"))) |> #create code column from lookup
+    select(areaname, everything()) #makes area name column first in line with other areatypes
+  
+  #hb dfs
+  hb_df <- hb_df |> 
+    row_to_names(row_number = 1) |> 
+    mutate(areatype = c("Health board")) |> 
+    rename(areaname = `NHS Board`) |> 
+    mutate(areaname = case_when(areaname == "Orkney Islands" ~ "Orkney", 
+                                areaname == "Shetland Islands" ~ "Shetland",
+                                .default = areaname)) |> #removing "Islands" from Orkney and Shetland 
+    mutate(areaname = paste("NHS", areaname)) |> #paste NHS on HB names to match lookup
+    left_join(filter(area_codes, str_detect(code, "S08")))  #joining with lookup
+  
+  #function can only take adp OR ca, not both, 
+  # and produces 1 df containing whichever of ca/adp is passed into function
+  if(is.null(adp_df)){ 
+    ca_adp_df <- ca_df |> 
+      row_to_names(row_number = 1) |> 
+      mutate(areatype = c("Council area")) |> 
+      rename(areaname = `Local authority`) |>
+      mutate(areaname = case_when(str_detect(areaname, "&") ~ str_replace(areaname, "&", "and"), #replace all & with "and"
+                                  areaname == "Edinburgh, City of" ~ "City of Edinburgh", 
+                                  .default = areaname)) |> 
+      left_join(filter(area_codes, str_detect(code, "S12")))
+  } else {
+    ca_adp_df <- adp_df |> 
+      row_to_names(row_number = 1) |> 
+      mutate(areatype = c("Alcohol & drug partnership")) |> 
+      rename(areaname = `Alcohol & Drug Partnership`) |>
+      left_join(area_codes_adp) |>
+      mutate(code = case_when(areaname == "MALDEP" ~ "S11000051", 
+                              areaname == "Lanarkshire ADP" ~ "S11000052",
+                              .default = code))  
+  }
+  
+  #combine scot, hb and adp/ca dfs
+  cleaned_df <- rbind(scot_df, hb_df, ca_adp_df) 
+  
+  cleaned_df <- cleaned_df |> 
+    clean_names() |> #clean col names
+    mutate_at(c(3:6), as.numeric) |> #convert columns with data to numeric
+    mutate(across(where(is.numeric), round, 1)) |>  #round to 1dp 
+    mutate(ind_id = id, #create indicator id col based on argument to function
+           numerator = "NA", #create numerator 
+           def_period = year, #duplicate year column to create trend axis col
+           trend_axis = case_when(def_period == "2007-2008" ~ "2007/2008", #create trend axis col
+                                  def_period == "2009-2010" ~ "2009/2010",
+                                  .default = def_period),
+           def_period = case_when(def_period == "2007-2008" ~ "2007 to 2008 survey years; 2-year aggregates",
+                                  def_period == "2009-2010" ~ "2009 to 2010 survey years; 2-year aggregates",
+                                  .default = paste(def_period,"survey year")), #create def_period col 
+           year = substr(year, 1, 4), #keep only first year for multi-year rows
+           year = as.integer(year)) |>  
+    select(code, ind_id, year, numerator, percent, lower_95_percent_ci, upper_95_percent_ci, def_period, trend_axis) |> #drop unnecessary cols 
+    rename(rate = percent, #rename cols to align with shiny data
+           lowci = lower_95_percent_ci,
+           upci = upper_95_percent_ci) 
+}
+
+
+###############################################.
+## Run function for drug misuse (4203) ----
+###############################################.
+
+drug_misuse <- indicator_cleaning(id = "4203" ,
+                                  all_data$Table_1, all_data$Table_2, all_data$Table_3, 
+                                  area_codes = area_codes, area_codes_adp = area_codes_adp)
 
 saveRDS(drug_misuse, file = paste0(data_folder, "Data to be checked/perception_drug_misuse_shiny.rds"))
 write.csv(drug_misuse, file = paste0(data_folder, "Data to be checked/perception_drug_misuse_shiny.csv"),row.names = F)
 
-# 4. Checks ---------------------------------------------------------------
+###############################################.
+## Run function for rowdy behaviour (4115) ------
+###############################################.
+
+rowdy_behaviour <- indicator_cleaning(id = "4115",
+                                      all_data$Table_4, all_data$Table_5, all_data$Table_6, 
+                                      area_codes = area_codes, area_codes_adp = area_codes_adp)
+
+saveRDS(rowdy_behaviour, file = paste0(data_folder, "Data to be checked/perceiving_rowdy_behaviour_shiny.rds"))
+write.csv(rowdy_behaviour, file = paste0(data_folder, "Data to be checked/perceiving_rowdy_behaviour_shiny.csv"),row.names = F)
+
+###############################################.
+## Run function for neighbourhood good place (20903) ----
+###############################################.
+
+good_place <- indicator_cleaning(id = "20903",
+                                 all_data$Table_7, all_data$Table_8, ca_df = all_data$Table_9, 
+                                 area_codes = area_codes, area_codes_adp = area_codes_adp)
+
+saveRDS(good_place, file = paste0(data_folder, "Data to be checked/adults_rating_neighbourhood_very_good_shiny.rds"))
+write.csv(good_place, file = paste0(data_folder, "Data to be checked/adults_rating_neighbourhood_very_good_shiny.csv"),row.names = F)
+
+
+############################################.
+## Analyst notes ----
+############################################.
 
 ## As there is no analyse_second function used for these indicators, run the following to 
 ## check data against last years (may need to change the file names)
 
-# a) Read in last years data
-last_year_rowdy <- read.csv(paste0(data_folder, "Shiny Data/4115 Rowdy behaviour_shiny.csv"))
-last_year_very_good <- read.csv(paste0(data_folder, "Shiny Data/20903_Neighbourhood_rating_shiny.csv"))
+# Read in last years data
+last_year_rowdy_behaviour <- read.csv(paste0(data_folder, "Shiny Data/4115 Rowdy behaviour_shiny.csv"))
+last_year_good_place <- read.csv(paste0(data_folder, "Shiny Data/20903_Neighbourhood_rating_shiny.csv"))
 last_year_drug_misuse <- read.csv(paste0(data_folder, "Shiny Data/4203 Perception drug misuse_shiny.csv"))
 
-# b) function to check totals of shared years
+# Function to check totals of shared years
 
 check_year_totals <- function(last_year_data, this_year_data){
   
   last_year_max <- as.numeric(max(last_year_data$year))
-
+  
   last_year <- last_year_data |> 
     group_by(year) |> 
     summarize(last_year_sum = sum(rate)) |> 
@@ -197,12 +208,11 @@ check_year_totals <- function(last_year_data, this_year_data){
   if(test$test_column[1] == 0) {
     
     print("All totals match")
-   
-
+    
   } else {
     non_match <- filter(both_years,
                         check != 0) 
-      
+    
     return(non_match)
     print("Totals don't match. See non_match dataframe.")
   }
@@ -210,6 +220,6 @@ check_year_totals <- function(last_year_data, this_year_data){
 
 # Check totals
 
-check_year_totals(last_year_data = last_year_rowdy, this_year_data = rowdy)
-check_year_totals(last_year_data = last_year_very_good, this_year_data = very_good)
-check_year_totals(last_year_data = last_year_drug_misuse, this_year_data = drug_misuse)
+check_year_totals(last_year_data = last_year_rowdy_behaviour, this_year_data = rowdy_behaviour)
+check_year_totals(last_year_data = last_year_good_place, this_year_data = good_place)
+check_year_totals(last_year_data = last_year_drug_misuse, this_year_data = drug_misuse)
\ No newline at end of file

From d95ef32e11230da25efd908d7d19ebeded620723 Mon Sep 17 00:00:00 2001
From: apigap01 <abigail.prescott@phs.scot>
Date: Wed, 2 Oct 2024 15:18:29 +0100
Subject: [PATCH 3/4] Single adult dwellings 2023 update

---
 Single Adult Dwellings.R | 73 ++++++++++++++++++++--------------------
 1 file changed, 37 insertions(+), 36 deletions(-)

diff --git a/Single Adult Dwellings.R b/Single Adult Dwellings.R
index bf74cc8..3bc1841 100644
--- a/Single Adult Dwellings.R	
+++ b/Single Adult Dwellings.R	
@@ -16,25 +16,25 @@ source("2.deprivation_analysis.R") # deprivation function
 #Reading data in directly from website that was manually downloaded for previous program
 
 
-col_names_n <- c("datazone", "name", 2006:2022)
-col_names_d <- c("datazone", "name", 2001:2022)
+col_names_n <- c("datazone", "name", 2006:2023)
+col_names_d <- c("datazone", "name", 2001:2023)
 
 #read data in direct from source
 sad_data_extract <- bind_rows(read_csv("https://statistics.gov.scot/slice/observations.csv?&dataset=http%3A%2F%2Fstatistics.gov.scot%2Fdata%2Fhousehold-estimates&http%3A%2F%2Fpurl.org%2Flinked-data%2Fcube%23measureType=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fmeasure-properties%2Fcount&http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fdimension%2Findicator%28dwellings%29=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fconcept%2Findicator-dwellings%2Fwith-single-adult-discounts",
-                       skip=8, col_names = col_names_n) %>%  mutate(type = "numerator"),
-                       read_csv("https://statistics.gov.scot/slice/observations.csv?&dataset=http%3A%2F%2Fstatistics.gov.scot%2Fdata%2Fhousehold-estimates&http%3A%2F%2Fpurl.org%2Flinked-data%2Fcube%23measureType=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fmeasure-properties%2Fcount&http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fdimension%2Findicator%28dwellings%29=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fconcept%2Findicator-dwellings%2Ftotal-dwellings", 
-                                skip = 8, col_names = col_names_d) %>% mutate(type = "denominator")) #%>% 
-  #janitor::clean_names()
+                                       skip=8, col_names = col_names_n) |>  mutate(type = "numerator"),
+                              read_csv("https://statistics.gov.scot/slice/observations.csv?&dataset=http%3A%2F%2Fstatistics.gov.scot%2Fdata%2Fhousehold-estimates&http%3A%2F%2Fpurl.org%2Flinked-data%2Fcube%23measureType=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fmeasure-properties%2Fcount&http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fdimension%2Findicator%28dwellings%29=http%3A%2F%2Fstatistics.gov.scot%2Fdef%2Fconcept%2Findicator-dwellings%2Ftotal-dwellings", 
+                                       skip = 8, col_names = col_names_d) |> mutate(type = "denominator")) #|> 
+#janitor::clean_names()
 sad_data_extract <-subset(sad_data_extract, select = -c (name, `2006`, `2001`:`2005`))
-sad_data_extract_format <- sad_data_extract %>% 
+sad_data_extract_format <- sad_data_extract |> 
   mutate (datazone=gsub("http://statistics.gov.scot/id/statistical-geography/","", datazone))
 #filter to only datazone level
 sad_data_extract_format <- filter(sad_data_extract_format,substr(datazone,1,3)=="S01")
 #pivot_longer years to one column
-sad_data_extract_pivot <- sad_data_extract_format %>% 
-  pivot_longer(cols = c(`2007`:`2022`), names_to = "year", values_to = "count") %>% 
-#pivot_wider type to two different columns for numerator and denominator
-  pivot_wider(names_from = type, values_from = count) %>% 
+sad_data_extract_pivot <- sad_data_extract_format |> 
+  pivot_longer(cols = c(`2007`:`2023`), names_to = "year", values_to = "count") |> 
+  #pivot_wider type to two different columns for numerator and denominator
+  pivot_wider(names_from = type, values_from = count) |> 
   filter(!is.na(denominator))
 
 
@@ -43,30 +43,31 @@ saveRDS(sad_data_extract_pivot, file=paste0(data_folder, 'Prepared Data/Single_D
 #### Match lookup - datazone with local authority
 
 # dz01 Lookup file for CA 
-dz01_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Deprivation/DataZone2001_all_simd.rds')%>% 
-  setNames(tolower(names(.))) %>% #variables to lower case
-  select(ca2019, datazone2001)  
-  # #Dealing with changes in ca, hb and hscp codes. Transforms old code versions into 2019 ones
-  #mutate(ca2011 = recode(ca2011, "S12000015"='S12000047', "S12000024"='S12000048',
-     #                  "S12000046"='S12000049', "S12000044"='S12000050'))
+dz01_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Deprivation/DataZone2001_all_simd.rds')|> 
+  clean_names() |> #variables to lower case
+  select(ca2019, data_zone2001) |> 
+  rename(datazone2001 = data_zone2001)
+# #Dealing with changes in ca, hb and hscp codes. Transforms old code versions into 2019 ones
+#mutate(ca2011 = recode(ca2011, "S12000015"='S12000047', "S12000024"='S12000048',
+#                  "S12000046"='S12000049', "S12000044"='S12000050'))
 
 # \\Isdsf00d03\cl-out\lookups\Unicode\Geography\DataZone2011
 #Preparing file for CA for period 2007 to 2014 (2014 only including dz <= S01006505)
-sad01_data <- sad_data_extract_pivot %>% filter(year<=2014)
+sad01_data <- sad_data_extract_pivot |> filter(year<=2014)
 #Merging with lookup
-sad01_data <- left_join(sad01_data, dz01_lookup, by = c("datazone" = "datazone2001")) %>% 
-  rename(ca = ca2019) %>% filter(datazone<='S01006505') %>% mutate(dz = "dz01")
+sad01_data <- left_join(sad01_data, dz01_lookup, by = c("datazone" = "datazone2001")) |> 
+  rename(ca = ca2019) |> filter(datazone<='S01006505') |> mutate(dz = "dz01")
 
 
-dz11_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Deprivation/DataZone2011_simd2020v2.rds')%>% 
-  setNames(tolower(names(.))) %>% #variables to lower case
-  select(ca2019, datazone2011)  
+dz11_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Deprivation/DataZone2011_simd2020v2.rds')|> 
+  clean_names() |> #variables to lower case
+  select(ca2019, datazone2011) #|> 
 
 #Preparing file for CA for period 2014 to 2017 (2014 only including dz > S01006505)
-sad11_data <- sad_data_extract_pivot %>% filter(year>=2014)
+sad11_data <- sad_data_extract_pivot |> filter(year>=2014)
 #Merging with lookup
-sad11_data <- left_join(sad11_data, dz11_lookup, by = c("datazone" = "datazone2011")) %>% 
-  rename(ca = ca2019) %>% filter(datazone>'S01006505') %>% mutate(dz = "dz11")
+sad11_data <- left_join(sad11_data, dz11_lookup, by = c("datazone" = "datazone2011")) |> 
+  rename(ca = ca2019) |> filter(datazone>'S01006505') |> mutate(dz = "dz11")
 
 # Merge dz01 & dz11 data into single file (Basefile)
 sad_data_raw <- full_join(sad01_data, sad11_data)
@@ -77,20 +78,20 @@ sad_data_raw <- full_join(sad01_data, sad11_data)
 
 #### Prepare / Aggregate for specified geographies - LA, DZ11 & base IRs
 # Prepare / Aggregate by la
-sadla_data_raw <- sad_data_raw %>%
-  group_by(ca, year, dz) %>% 
-  summarise_at(c("numerator", "denominator"), sum, na.rm =T) %>% 
-  filter(dz != "dz01" | year != "2014") %>% ungroup()
+sadla_data_raw <- sad_data_raw |>
+  group_by(ca, year, dz) |> 
+  summarise_at(c("numerator", "denominator"), sum, na.rm =T) |> 
+  filter(dz != "dz01" | year != "2014") |> ungroup()
 
 sadla_data_raw <- select(sadla_data_raw,-c(dz))
 
 saveRDS(sadla_data_raw, file=paste0(data_folder, 'Prepared Data/Single_Dwellings_LA_raw.rds'))
 
 # Prepare / Aggregate by dz11
-sad11_data_raw <- sad_data_raw %>%
-  group_by(datazone, year, dz) %>% 
-  summarise_at(c("numerator", "denominator"), list(sum), na.rm =T) %>% 
-  filter(dz == "dz11") %>% ungroup()
+sad11_data_raw <- sad_data_raw |>
+  group_by(datazone, year, dz) |> 
+  summarise_at(c("numerator", "denominator"), list(sum), na.rm =T) |> 
+  filter(dz == "dz11") |> ungroup()
 
 sad11_data_raw <- select(sad11_data_raw,-c(dz))
 
@@ -105,7 +106,7 @@ analyze_first(filename = "Single_Dwellings_LA", geography = "council", measure =
               yearstart = 2007, yearend = 2013, time_agg = 1)
 
 analyze_first(filename = "Single_Dwellings_dz11", geography = "datazone11", measure = "percent", 
-              yearstart = 2014, yearend = 2022, time_agg = 1)
+              yearstart = 2014, yearend = 2023, time_agg = 1)
 
 # Merging CA, DZ11 together and save both periods together
 all_data <- rbind(readRDS(paste0(data_folder, "Temporary/Single_Dwellings_LA_formatted.rds")),
@@ -118,7 +119,7 @@ analyze_second(filename = "Single_Dwellings_all", measure = "percent", time_agg
 
 #Deprivation analysis function
 analyze_deprivation(filename="Single_Dwellings_depr", measure="percent", time_agg=1, 
-                    yearstart= 2007, yearend=2022,   year_type = "calendar", 
+                    yearstart= 2007, yearend=2023,   year_type = "calendar", 
                     ind_id = 20504)
 
 ##END

From 98c65863110c19ea63869c51b519b4b9ef3c00f8 Mon Sep 17 00:00:00 2001
From: Monica McGibbon <monica.mcgibbon@phs.scot>
Date: Tue, 8 Oct 2024 15:43:27 +0100
Subject: [PATCH 4/4] re-run deprivation function

---
 Single Adult Dwellings.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Single Adult Dwellings.R b/Single Adult Dwellings.R
index 3bc1841..c5e9038 100644
--- a/Single Adult Dwellings.R	
+++ b/Single Adult Dwellings.R	
@@ -119,7 +119,7 @@ analyze_second(filename = "Single_Dwellings_all", measure = "percent", time_agg
 
 #Deprivation analysis function
 analyze_deprivation(filename="Single_Dwellings_depr", measure="percent", time_agg=1, 
-                    yearstart= 2007, yearend=2023,   year_type = "calendar", 
+                    yearstart= 2007, yearend=2022,   year_type = "calendar", 
                     ind_id = 20504)
 
 ##END