-
Notifications
You must be signed in to change notification settings - Fork 1
/
Access deprived.R
executable file
·164 lines (130 loc) · 7.89 KB
/
Access deprived.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# ScotPHO indicators: people living in 15% most access deprived areas
# Part 1 - Create population files
# Part 2 - Create access rank data and save basefiles
# Part 3 - Calling the analysis functions
###############################################.
## Packages/Filepaths/Functions ----
###############################################.
# Varies filepaths depending on if using server or not.
if (sessionInfo()$platform %in% c("x86_64-redhat-linux-gnu (64-bit)", "x86_64-pc-linux-gnu (64-bit)")) {
cl_out_depr <- "/conf/linkage/output/lookups/Unicode/Deprivation/"
} else {
cl_out_depr <- "//stats/linkage/output/lookups/Unicode/Deprivation/"
}
source("1.indicator_analysis.R") #Normal indicator functions
source("2.deprivation_analysis.R") # deprivation function
#Small function to standarize each years info. Function parameters:
#Data is for what basefile to use, list_pos is for the position of the data frame
#simd for which simd variables-year to look at, year for what year is the data created.
read_simd <- function(data, simd, year, list_pos) {
datazone <- tolower(substr(data,1,12))
data_simd <- readRDS(paste0(cl_out_depr, data, '.rds')) %>%
setNames(tolower(names(.))) %>% #variables to lower case
select({{simd}}, datazone) %>%
rename(rank = {{simd}}, datazone = datazone) %>%
mutate(year = year)
data_access[[list_pos]] <<- data_simd #assigning to list
}
###############################################.
## Part 1 - Create population files ----
###############################################.
# Creating base populations using dz2001 before 2014 and dz2011 onwards
#This is better to be run in R server.
dz01_base <- readRDS(paste0(data_folder, "Lookups/Population/DZ01_pop_basefile.rds")) %>%
filter(year<2014) %>% # 2014 uses simd2016 based on dz2011
rename(datazone = datazone2001)
dz11_base <- readRDS(paste0(data_folder, "Lookups/Population/DZ11_pop_basefile.rds")) %>%
subset(year>2013) %>% # 2014 onwards uses simd based on dz2011
rename(datazone = datazone2011)
dz_pop_base <- rbind(dz01_base, dz11_base)
rm(dz01_base, dz11_base)
# This creates a file with the number of population that represents Scotland's 15%
scot_pop_base <- dz_pop_base %>% group_by(year) %>%
mutate(pop_15 = denominator/20*3) %>% # creating 15% population
summarise(pop_15 = sum(pop_15)) #obtaining total pop
# Data set with the population for each datazone
dz_pop_base <- dz_pop_base %>% group_by(year, datazone) %>%
summarise(pop = sum(denominator)) #obtaining total pop for each datazone
###############################################.
## Part 2 - Create access rank data ----
###############################################.
data_access <- list() #creating empty list for placing data created by function
# The function creates the dataset with the rank for each datazone and assigns it to the list
mapply(read_simd, data = "DataZone2001_all_simd", simd = "simd2004_access_rank",
year = 2002:2003, list_pos = 1:2) #simd version 2004
mapply(read_simd, data = "DataZone2001_all_simd", simd = "simd2006_access_rank",
year = 2004:2006, list_pos = 3:5) #simd version 2006
mapply(read_simd, data = "DataZone2001_all_simd", simd = "simd2009v2_access_rank",
year = 2007:2009, list_pos = 6:8) #simd version 2009
mapply(read_simd, data = "DataZone2001_all_simd", simd = "simd2012_access_rank",
year = 2010:2013, list_pos = 9:12) #simd version 2012
mapply(read_simd, data = "DataZone2011_simd2016", simd = "simd2016_access_rank",
year = 2014:2016, list_pos = 13:15) #simd version 2016
mapply(read_simd, data = "DataZone2011_simd2020v2", simd = "simd2020v2_access_rank",
year = 2017:2019, list_pos = 16:18) #simd version 2020
data_access <- do.call("rbind", data_access) # converting from list into dataframe
# Joining with both of the populations: dz and scotland 15%
data_access <- left_join(data_access, dz_pop_base, by = c("datazone", "year"))
data_access <- left_join(data_access, scot_pop_base, by = "year")
rm(dz_pop_base)
# Creating cumulative populations for each year based on access rank
data_access %<>% group_by(year) %>% arrange(rank) %>%
mutate(cum_pop = cumsum(pop)) %>% ungroup() %>%
mutate_if(is.integer, as.numeric) %>% #R complaining of different variable types
# If the datazone is included in the 15% more access deprived then use its
# population as numerator, if not consider 0
mutate(numerator = case_when((pop_15 - cum_pop)>=0 ~ pop,
TRUE ~ 0)) %>%
select(datazone, year, numerator)
#File for deprivation analysis
saveRDS(data_access, file = paste0(data_folder, "Prepared Data/access_deprived_depr_raw.rds"))
#File for DZ11 for 2014 onwards
data_access_dz11 <- data_access %>% filter(year>2013)
saveRDS(data_access_dz11, file = paste0(data_folder, "Prepared Data/access_deprived_dz11_raw.rds"))
#Preparing file for CA for period 2004 to 2013
data_access_dz01 <- data_access %>% filter(year<2014)
#Lookup file for CA
ca_lookup <- read_xlsx(paste0(data_folder, "Lookups/Geography/DataZone2001.xlsx")) %>%
setNames(tolower(names(.))) %>% select(ca, datazone)
#Merging with lookup and aggregating by ca
data_access_dz01 <- left_join(data_access_dz01, ca_lookup) %>%
group_by(ca, year) %>%
summarise(numerator=sum(numerator, na.rm = T)) %>% ungroup() %>%
#Dealing with changes in ca codes. Transforms old code versions into 2019 ones
mutate(ca = recode(ca, "S12000015"='S12000047', "S12000024"='S12000048',
"S12000046"='S12000049', "S12000044"='S12000050'))
saveRDS(data_access_dz01, file = paste0(data_folder, "Prepared Data/access_deprived_ca_raw.rds"))
###############################################.
## Part 2 - Calling the analysis functions ----
###############################################.
#Normal indicator analysis, first for CA and then DZ11
analyze_first(filename = "access_deprived_ca", geography = "council", measure = "percent", hscp = T,
yearstart = 2002, yearend = 2013, time_agg = 1, pop = "CA_pop_allages")
analyze_first(filename = "access_deprived_dz11", geography = "datazone11", measure = "percent",
yearstart = 2014, yearend = 2019, time_agg = 1, pop = "DZ11_pop_allages")
#Merging CA and DZ11 together
all_data <- rbind(readRDS(paste0(data_folder, "Temporary/access_deprived_dz11_formatted.rds")),
readRDS(paste0(data_folder, "Temporary/access_deprived_ca_formatted.rds")))
saveRDS(all_data, file = paste0(data_folder, "Temporary/access_deprived_all_formatted.rds"))
#Calling second analysis function
analyze_second(filename = "access_deprived_all", measure = "percent",
time_agg = 1, ind_id = 20902, year_type = "calendar")
###### Save final result before it is overwritten by analyze_deprivation() and filter correct years to include in Plot
data_shiny_filtered <- final_result %>%
select(c(code, ind_id, year, numerator, rate, lowci, upci, def_period, trend_axis)) %>%
filter(year %in% c(2002, 2004, 2007, 2010, 2014, 2017)) %>%
arrange(code, year, trend_axis)
###############################################.
#Deprivation analysis function
analyze_deprivation(filename="access_deprived_depr", measure="percent", time_agg=1,
yearstart= 2002, yearend=2019,
year_type = "calendar", pop = "depr_pop_allages", ind_id = 20902)
####### Filter depirvation data to include correct years
data_shiny_deprivation_filtered <- final_result %>%
filter(year %in% c(2002, 2004, 2007, 2010, 2014, 2017)) %>%
arrange(code, year, trend_axis)
# Save to Data to be checked folder
saveRDS(data_shiny_filtered, file = paste0("/PHI_conf/ScotPHO/Profiles/Data/", "Data to be checked/", "access_deprived_all", "_shiny.rds"))
write_csv(data_shiny_filtered, file = paste0("/PHI_conf/ScotPHO/Profiles/Data/", "Data to be checked/", "access_deprived_all", "_shiny.csv"))
saveRDS(data_shiny_deprivation_filtered, file = paste0("/PHI_conf/ScotPHO/Profiles/Data/", "Data to be checked/", "access_deprived_depr_ineq.rds"))
## END