-
Notifications
You must be signed in to change notification settings - Fork 1
/
Smoking attributable deaths.R
424 lines (375 loc) · 27 KB
/
Smoking attributable deaths.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
# ScotPHO indicator: Smoking attributable deaths
# Notes for analyst:
# Smoking attributable admissions/deaths are estimates only - it is calculated by following methodology laid out by PHE.
# Put simply calculation involves multiplying certain types of admissions/deaths by the attributable fractions that are
# associated with smoking and then also factoring percentage of the population that are current or ex smokers.
# In June 2023 the indicator switched from using SHoS(household survey) data as the source of smoking prevalence to using SHeS (health survey) data instead.
# Around 2020 the SHoS survery stopped asking questions on ex-smoking status - which is essential for attributable calculation.
# The switch was also made on the recommendation of SG SHoS team as SHeS is deemed best source of health data
# SHeS was not previously used as the sample size did not allow for robust estimates at LA level.
# Since sample size of SHeS is small and local authority level data not robust we need to use aggregated years to provide smoking status for areas - this isn't ideal as scotpho indicator is a rolling average
# but smoking attributable admissions/deaths are artificial construct and using best available data should be acceptable.
# As of June 2023 2 teams in PHS produce smoking attributable figures for ScotPHO - the tobacco team (lead by Scot Kilgariff, who host their estimates on scotpho website under tobacco data pages) and ScotPHO team (who produce indicator data for scotpho profiles tool).
# The estimates produced by the two teams serve different purposes and data is generated using different scripts - the outputs are
# therefore slightly different but figures should not be drastically different. Once indicator data has been generated the Scotland totals can be compared to output published https://www.scotpho.org.uk/risk-factors/tobacco-use/data/smoking-attributable-deaths/
# Although both teams now use SHeS as source of smoking prevalence our scotland totals will differ as ScotPHO indicator is a 2 year rolling figure (tobacco team produce scotland only data for individual years
# scotpho estimates that include data prior to 2019 will still be based on SHoS (when we switched we did not back calculate historic data)
# Part 1 - Compile smoking prevalence data
# Part 2 - Extract data from SMRA
# Part 3 - add in relative risks of each disease as a result of smoking
# Part 4 - Aggregating geographic areas
# Part 5 - Calculate smoking attributable fraction
# Part 6 - Run analysis functions
###############################################.
## Packages/Filepaths/Functions ----
###############################################.
source("1.indicator_analysis.R") #Normal indicator functions
###############################################.
# Part 1 - Compile smoking prevalence data ----
# Requires 2 data series both are trends in percentage of ex & current smokers in those aged 35 year plus, by year, by sex
# Series 1 : prevalence across all age groups but split by ca/nhs board with overall scotland values
# Series 2 : Scotland level prevalence buy split by age groups
###############################################.
# Historically there are 2 potential sources of smoking prevalence data,both national surveys but due to differences in sample size and available data meant that scotpho indicator
# needed to switch from using SHoS (Scottish Household survey) to SHeS (Scottish Health survey) as the source of smoking prevalence data for generation the smoking attributable deaths/admissions indicators.
###############################################.
# Prevalence data series 1: AREA PREVALENCE ----
###############################################.
# read in SHoS data (for period 2012-2018)
smok_prev_area_shos <- read_excel(paste0(data_folder, "Received Data/Smoking Attributable/SHOS_smoking_prevalence_formatted (DO NOT DELETE old data before move to shes).xlsx"),
sheet = "Area prev") %>% mutate(code = NA) %>%
setNames(tolower(names(.))) %>% #variables to lower case
mutate(period=as.character(year),
source="SHoS")
# Recoding names into codes. First councils and then HBs and Scotland
smok_prev_area_shos$code[which(smok_prev_area_shos$type=="ca")] <- recode(smok_prev_area_shos$area[which(smok_prev_area_shos$type=="ca")],
'Aberdeen City' = 'S12000033', 'Aberdeenshire' = 'S12000034',
'Angus' = 'S12000041', 'Argyll & Bute' = 'S12000035',
'Clackmannanshire' = 'S12000005', 'Dumfries & Galloway' = 'S12000006',
'Dundee City' = 'S12000042','East Ayrshire' = 'S12000008',
'East Dunbartonshire' = 'S12000045','East Lothian' = 'S12000010',
'East Renfrewshire' = 'S12000011', 'Edinburgh, City of' = 'S12000036',
'Na h-Eileanan Siar' = 'S12000013','Falkirk' = 'S12000014',
'Fife' = 'S12000047', 'Glasgow City' = 'S12000049',
'Highland' = 'S12000017', 'Inverclyde' = 'S12000018',
'Midlothian' = 'S12000019', 'Moray' = 'S12000020',
'North Ayrshire' = 'S12000021','North Lanarkshire' = 'S12000050',
'Orkney Islands' = 'S12000023', 'Perth & Kinross' = 'S12000048',
'Renfrewshire' = 'S12000038', 'Scottish Borders' = 'S12000026',
'Shetland Islands' = 'S12000027','South Ayrshire' = 'S12000028',
'South Lanarkshire' = 'S12000029','Stirling' = 'S12000030',
'West Dunbartonshire' = 'S12000039', 'West Lothian' = 'S12000040')
smok_prev_area_shos$code[which(smok_prev_area_shos$type=="hb")] <- recode(smok_prev_area_shos$area[which(smok_prev_area_shos$type=="hb")],
'Ayrshire & Arran' = 'S08000015', 'Borders' = 'S08000016',
'Dumfries & Galloway' = 'S08000017', 'Fife' = 'S08000029',
'Forth Valley' = 'S08000019','Grampian' = 'S08000020',
'Greater Glasgow & Clyde' = 'S08000031','Highland' = 'S08000022',
'Lanarkshire' = 'S08000032', 'Lothian' = 'S08000024',
'Orkney' = 'S08000025', 'Shetland' = 'S08000026',
'Tayside' = 'S08000030','Western Isles' = 'S08000028','Scotland' = 'S00000001')
# read in SHsS data (for period 2019 onward)
smok_prev_area_shes <- read_excel(paste0(data_folder, "Received Data/Smoking Attributable/shes smoking prevalence_for scottish areas.xlsx"),
sheet = "area_prev_shes") %>% mutate(code = NA) %>%
setNames(tolower(names(.))) #variables to lower case
#filter & reshape shes data
smok_prev_area_shes <- smok_prev_area_shes %>%
select(-c("frequency","lci","uci")) %>%
mutate(smoking_categories=case_when(
smoking_categories=="Never smoked/Used to smoke occasionally" ~ "never",
smoking_categories=="Used to smoke regularly" ~ "ex_area",
smoking_categories=="Current smoker" ~ "current_area",
TRUE~"other")) %>%
pivot_wider(names_from ="smoking_categories",
values_from = "percent") %>%
filter(sex!=3) %>% #exclude sex 3 (all)
select(-"never")
# Recoding names into codes. First councils and then HBs and Scotland
smok_prev_area_shes$code[which(smok_prev_area_shes$type=="ca")] <- recode(smok_prev_area_shes$area[which(smok_prev_area_shes$type=="ca")],
'Aberdeen City' = 'S12000033', 'Aberdeenshire' = 'S12000034',
'Angus' = 'S12000041', 'Argyll and Bute' = 'S12000035',
'Clackmannanshire' = 'S12000005', 'Dumfries and Galloway' = 'S12000006',
'Dundee City' = 'S12000042','East Ayrshire' = 'S12000008',
'East Dunbartonshire' = 'S12000045','East Lothian' = 'S12000010',
'East Renfrewshire' = 'S12000011', 'Edinburgh City' = 'S12000036',
'Na h-Eileanan Siar' = 'S12000013','Falkirk' = 'S12000014',
'Fife' = 'S12000047', 'Glasgow City' = 'S12000049',
'Highland' = 'S12000017', 'Inverclyde' = 'S12000018',
'Midlothian' = 'S12000019', 'Moray' = 'S12000020',
'North Ayrshire' = 'S12000021','North Lanarkshire' = 'S12000050',
'Orkney Islands' = 'S12000023', 'Perth and Kinross' = 'S12000048',
'Renfrewshire' = 'S12000038', 'Scottish Borders' = 'S12000026',
'Shetland Islands' = 'S12000027','South Ayrshire' = 'S12000028',
'South Lanarkshire' = 'S12000029','Stirling' = 'S12000030',
'West Dunbartonshire' = 'S12000039', 'West Lothian' = 'S12000040')
smok_prev_area_shes$code[which(smok_prev_area_shes$type=="hb")] <- recode(smok_prev_area_shes$area[which(smok_prev_area_shes$type=="hb")],
'Ayrshire and Arran' = 'S08000015', 'Borders' = 'S08000016',
'Dumfries and Galloway' = 'S08000017', 'Fife' = 'S08000029',
'Forth Valley' = 'S08000019','Grampian' = 'S08000020',
'Greater Glasgow and Clyde' = 'S08000031','Highland' = 'S08000022',
'Lanarkshire' = 'S08000032', 'Lothian' = 'S08000024',
'Orkney' = 'S08000025', 'Shetland' = 'S08000026',
'Tayside' = 'S08000030','Western Isles' = 'S08000028','Scotland' = 'S00000001')
# add columns containing Scotland current and ex smoking prevalence rate to entire dataset (used in calculations later)
smok_prev_area_shes_scot <- smok_prev_area_shes %>%
filter(code=="S00000001") %>%
select(-c("area","type","source","code")) %>%
rename(scot_current=current_area, scot_ex=ex_area)
smok_prev_area_shes <-left_join(smok_prev_area_shes,smok_prev_area_shes_scot,by = c("sex","period","year"))
# 2020 prevalence data not available due to issues with pandemic period survey collection - to compensate apply 2019 prevalence rates to 2020 data
# scotpho indicator a 2 year rolling average therefore doesn't cope well with missing years of data
smok_prev_area_shes2020 <-smok_prev_area_shes %>%
filter(year==2019) %>%
mutate(source=case_when(year==2019 ~ "shes duplicate2019"),
year=case_when(year==2019 ~ 2020))
#bind shos and shes area prevalence
smok_prev_area <- rbind(smok_prev_area_shes,smok_prev_area_shos,smok_prev_area_shes2020) %>%
mutate(sex_grp=as.character(sex)) %>%
select (-sex) %>%
arrange(code, year, sex_grp)
rm(smok_prev_area_shes, smok_prev_area_shos,smok_prev_area_shes_scot,smok_prev_area_shes2020) # remove df not needed
###############################################.
## Prevalence data series 2: AGE PREVALENCE ----
###############################################.
# read in SHoS age data (for period 2012-2018)
smok_prev_age_shos <- read_excel(paste0(data_folder, "Received Data/Smoking Attributable/SHOS_smoking_prevalence_formatted (DO NOT DELETE old data before move to shes).xlsx"),
sheet = "Age prev") %>% rename(sex_grp = sex) %>%
setNames(tolower(names(.))) %>%
#variables to lower case
mutate(age_grp2 = case_when(agegrp=='35-54' ~ 2, agegrp=='55-64' ~ 3,
agegrp=='65-74' ~ 4, agegrp=='75+' ~ 5)) %>%
select(-agegrp, -code) %>%
mutate(sex_grp = as.character(sex_grp)) %>% #to allow merging in next section
mutate(source="SHoS")
# read in SHeS age data (for period 2019 onwards)
smok_prev_age_shes <- read_excel(paste0(data_folder, "Received Data/Smoking Attributable/shes smoking prevalence_for agegroups.xlsx"),
sheet = "age_prev_shes") %>%
setNames(tolower(names(.))) %>%
select(-c("frequency")) %>%
mutate(smoking_category=case_when(
smoking_category=="Never smoked/Used to smoke occasionally" ~ "never",
smoking_category=="Used to smoke regularly" ~ "ex_age",
smoking_category=="Current smoker" ~ "current_age",
TRUE~"other")) %>%
pivot_wider(names_from ="smoking_category",
values_from = "percent") %>%
filter(sex_grp!=3) %>% #exclude sex 3 (all)
mutate(age_grp2 = case_when(agegrp=='35-54' ~ 2, agegrp=='55-64' ~ 3,
agegrp=='65-74' ~ 4, agegrp=='75+' ~ 5)) %>%
select(-agegrp, -code) %>%
mutate(sex_grp = as.character(sex_grp)) #to allow merging in next section
# 2020 prevalence data not available due to issues with pandemic period survey collection - to compensate apply 2019 prevalence rates to 2020
smok_prev_age_shes2020 <-smok_prev_age_shes %>%
filter(year==2019) %>%
mutate(source=case_when(year==2019 ~ "shes duplicate2019"),
year=case_when(year==2019 ~ 2020))
#bind shos and shes area prevalence
smok_prev_age <- rbind(smok_prev_age_shes,smok_prev_age_shos,smok_prev_age_shes2020) %>%
arrange(year,sex_grp)
rm(smok_prev_age_shes,smok_prev_age_shos, smok_prev_age_shes2020 ) # remove df not needed
###############################################.
## Part 2 - Extract data from SMRA ----
###############################################.
# SMRA login information
channel <- suppressWarnings(dbConnect(odbc(), dsn="SMRA",
uid=.rs.askForPassword("SMRA Username:"),
pwd=.rs.askForPassword("SMRA Password:")))
# Extracting deaths from Scottish residents 35 and over with a smoking
# attributable diagnosis code within the main diagnosis field.
# Some codes used in the admissions analysis are not used here as considered non lethal
# e.g. hip fracture
smoking_deaths <- as_tibble(dbGetQuery(channel, statement=
"SELECT year_of_registration year, substr(UNDERLYING_CAUSE_OF_DEATH,1,3) diag, age,
sex sex_grp, postcode pc7
FROM ANALYSIS.GRO_DEATHS_C
WHERE date_of_registration between '1 January 2012' and '31 December 2022'
AND sex <> 9
AND age > 34
AND country_of_residence='XS'
AND council_area is not null
AND regexp_like(UNDERLYING_CAUSE_OF_DEATH, 'C3[34]|C0|C1[0-6]|C25|C32|C53|C6[4-8]|C80|C92|J4[0-4]|J1[0-8]|I0|I[234]|I5[01]|I6|I7[0-8]|K2[567]')")) %>%
setNames(tolower(names(.))) %>% #variables to lower case
create_agegroups() # Creating age groups for standardization.
# Bringing LA and datazone info.
postcode_lookup <- readRDS('/conf/linkage/output/lookups/Unicode/Geography/Scottish Postcode Directory/Scottish_Postcode_Directory_2023_1.rds') %>%
setNames(tolower(names(.))) %>% #variables to lower case
select(pc7, ca2019, hb2019)
smoking_deaths <- left_join(smoking_deaths, postcode_lookup, "pc7") %>%
mutate(scotland = "S00000001") # creating variable for Scotland
###############################################.
## Part 3 - add in relative risks of each disease as a result of smoking ----
###############################################.
# Taken from Public Health England profiles
smoking_deaths %<>%
mutate(current = case_when( #Current smokers risk
sex_grp == 1 & diag >= "C00" & diag <= "C14" ~ 10.89, #Upper respiratory sites cancers
sex_grp == 2 & diag >= "C00" & diag <= "C14" ~ 5.08,
sex_grp == 1 & diag == "C15" ~ 6.76, #Oesaphagus cancers
sex_grp == 2 & diag == "C15" ~ 7.75,
sex_grp == 1 & diag == "C16" ~ 1.96, #Stomach cancers
sex_grp == 2 & diag == "C16" ~ 1.36,
sex_grp == 1 & diag == "C25" ~ 2.31, #Pancreas cancers
sex_grp == 2 & diag == "C25" ~ 2.25,
sex_grp == 1 & diag == "C32" ~ 14.60, #Larynx cancers
sex_grp == 2 & diag == "C32" ~ 13.02,
sex_grp == 1 & diag %in% c("C33", "C34") ~ 23.26, #Trachea, lung, bronchus cancers
sex_grp == 2 & diag %in% c("C33", "C34") ~ 12.69,
sex_grp == 1 & diag == "C53" ~ 1, #Cervical cancers
sex_grp == 2 & diag == "C53" ~ 1.59,
sex_grp == 1 & diag %in% c("C64", "C65", "C66", "C68") ~ 2.50, #Kidney, renal pelvis cancers
sex_grp == 2 & diag %in% c("C64", "C65", "C66", "C68") ~ 1.40,
sex_grp == 1 & diag == "C67" ~ 3.27, #Bladder cancers
sex_grp == 2 & diag == "C67" ~ 2.22,
sex_grp == 1 & diag == "C80" ~ 4.40, #Unspecified site cancers
sex_grp == 2 & diag == "C80" ~ 2.20,
sex_grp == 1 & diag == "C92" ~ 1.80, #Myeloid leukaemia
sex_grp == 2 & diag == "C92" ~ 1.20,
sex_grp == 1 & diag >= "I20" & diag <= "I25" & age_grp <= 11 ~ 4.20, #Ischaemic heart disease
sex_grp == 1 & diag >= "I20" & diag <= "I25" & age_grp %in% c(12, 13) ~ 2.50,
sex_grp == 1 & diag >= "I20" & diag <= "I25" & age_grp %in% c(14, 15) ~ 1.80,
sex_grp == 1 & diag >= "I20" & diag <= "I25" & age_grp >= 16 ~ 1.40,
sex_grp == 2 & diag >= "I20" & diag <= "I25" & age_grp <= 11 ~ 5.30,
sex_grp == 2 & diag >= "I20" & diag <= "I25" & age_grp %in% c(12, 13) ~ 2.80,
sex_grp == 2 & diag >= "I20" & diag <= "I25" & age_grp %in% c(14, 15) ~ 2.10,
sex_grp == 2 & diag >= "I20" & diag <= "I25" & age_grp >= 16 ~ 1.40,
sex_grp == 1 & ((diag >= "I00" & diag <= "I09") | #Other heart disease
(diag >= "I26" & diag <= "I51")) ~ 1.78,
sex_grp == 2 & ((diag >= "I00" & diag <= "I09") |
(diag >= "I26" & diag <= "I51")) ~ 1.49,
sex_grp == 1 & diag >= "I60" & diag <= "I69" & age_grp <= 11 ~ 4.40, #Cerebrovascular disease
sex_grp == 1 & diag >= "I60" & diag <= "I69" & age_grp %in% c(12, 13) ~ 3.10,
sex_grp == 1 & diag >= "I60" & diag <= "I69" & age_grp %in% c(14, 15) ~ 2.20,
sex_grp == 1 & diag >= "I60" & diag <= "I69" & age_grp >= 16 ~ 1.60,
sex_grp == 2 & diag >= "I60" & diag <= "I69" & age_grp <= 11 ~ 5.40,
sex_grp == 2 & diag >= "I60" & diag <= "I69" & age_grp %in% c(12, 13) ~ 3.70,
sex_grp == 2 & diag >= "I60" & diag <= "I69" & age_grp %in% c(14, 15) ~ 2.60,
sex_grp == 2 & diag >= "I60" & diag <= "I69" & age_grp >= 16 ~ 1.30,
sex_grp == 1 & diag == "I70" ~ 2.44, #Atherosclerosis.
sex_grp == 2 & diag == "I70" ~ 1.83,
sex_grp == 1 & diag == "I71" ~ 6.21, #Aortic aneurysm
sex_grp == 2 & diag == "I71" ~ 7.07,
sex_grp == 1 & diag >= "I72" & diag <= "I78" ~ 2.07, #Other arterial disease
sex_grp == 2 & diag >= "I72" & diag <= "I78" ~ 2.17,
sex_grp == 1 & diag >= "J10" & diag <= "J18" & age_grp <=13 ~ 2.50, #Pneumonia, influenza
sex_grp == 1 & diag >= "J10" & diag <= "J18" & age_grp >= 14 ~ 2.00,
sex_grp == 2 & diag >= "J10" & diag <= "J18" & age_grp <=13 ~ 4.30,
sex_grp == 2 & diag >= "J10" & diag <= "J18" & age_grp >= 14 ~ 2.20,
sex_grp == 1 & diag %in% c("J40", "J41", "J42", "J43") ~ 17.10, #Chronic obstructive lung disease
sex_grp == 2 & diag %in% c("J40", "J41", "J42", "J43") ~ 12.04,
sex_grp == 1 & diag == "J44" ~ 10.58, #Chronic airway obstruction
sex_grp == 2 & diag == "J44" ~ 13.08,
sex_grp == 1 & diag %in% c("K25", "K26", "K27") ~ 5.40, #Stomach / duodenal ulcer
sex_grp == 2 & diag %in% c("K25", "K26", "K27") ~ 5.50,
TRUE ~ 0
)) %>%
mutate(ex = case_when( #Ex-smokers risk
sex_grp == 1 & diag >= "C00" & diag <= "C14" ~ 3.40, #Upper respiratory sites cancers
sex_grp == 2 & diag >= "C00" & diag <= "C14" ~ 2.29,
sex_grp == 1 & diag == "C15" ~ 4.46, #Oesaphagus cancers
sex_grp == 2 & diag == "C15" ~ 2.79,
sex_grp == 1 & diag == "C16" ~ 1.47, #Stomach cancers
sex_grp == 2 & diag == "C16" ~ 1.32,
sex_grp == 1 & diag == "C25" ~ 1.15, #Panchreas cancers
sex_grp == 2 & diag == "C25" ~ 1.55,
sex_grp == 1 & diag == "C32" ~ 6.34, #Larynx cancers
sex_grp == 2 & diag == "C32" ~ 5.16,
sex_grp == 1 & diag %in% c("C33", "C34") ~ 8.70, #Trachea, lung, bronchus cancers
sex_grp == 2 & diag %in% c("C33", "C34") ~ 4.53,
sex_grp == 1 & diag == "C53" ~ 1, #Cervical cancers
sex_grp == 2 & diag == "C53" ~ 1.14,
sex_grp == 1 & diag %in% c("C64", "C65", "C66", "C68") ~ 1.70, #Kidney, renal pelvis cancers
sex_grp == 2 & diag %in% c("C64", "C65", "C66", "C68") ~ 1.10,
sex_grp == 1 & diag == "C67" ~ 2.09, #Bladder cancers
sex_grp == 2 & diag == "C67" ~ 1.89,
sex_grp == 1 & diag == "C80" ~ 2.50, #Unspecified site cancers
sex_grp == 2 & diag == "C80" ~ 1.30,
sex_grp == 1 & diag == "C92" ~ 1.40, #Myeloid leukaemia
sex_grp == 2 & diag == "C92" ~ 1.30,
sex_grp == 1 & diag >= "I20" & diag <= "I25" & age_grp <= 11 ~ 2.00, #Ischaemic heart disease
sex_grp == 1 & diag >= "I20" & diag <= "I25" & age_grp %in% c(12, 13) ~ 1.60,
sex_grp == 1 & diag >= "I20" & diag <= "I25" & age_grp %in% c(14, 15) ~ 1.30,
sex_grp == 1 & diag >= "I20" & diag <= "I25" & age_grp >= 16 ~ 1.10,
sex_grp == 2 & diag >= "I20" & diag <= "I25" & age_grp <= 11 ~ 2.60,
sex_grp == 2 & diag >= "I20" & diag <= "I25" & age_grp %in% c(12, 13) ~ 1.10,
sex_grp == 2 & diag >= "I20" & diag <= "I25" & age_grp %in% c(14, 15) ~ 1.20,
sex_grp == 2 & diag >= "I20" & diag <= "I25" & age_grp >= 16 ~ 1.20,
sex_grp == 1 & ((diag >= "I00" & diag <= "I09") | #Other heart disease
(diag >= "I26" & diag <= "I51")) ~ 1.22,
sex_grp == 2 & ((diag >= "I00" & diag <= "I09") |
(diag >= "I26" & diag <= "I51")) ~ 1.14,
sex_grp == 1 & diag >= "I60" & diag <= "I69" ~ 1.10, #Cerebrovascular disease
sex_grp == 2 & diag >= "I60" & diag <= "I69" & age_grp <= 15 ~ 1.30,
sex_grp == 2 & diag >= "I60" & diag <= "I69" & age_grp >= 16 ~ 1,
sex_grp == 1 & diag == "I70" ~ 1.33, #Atherosclerosis.
sex_grp == 2 & diag == "I70" ~ 1,
sex_grp == 1 & diag == "I71" ~ 3.07, #Aortic aneurysm
sex_grp == 2 & diag == "I71" ~ 2.07,
sex_grp == 1 & diag >= "I72" & diag <= "I78" ~ 1.01, #Other arterial disease
sex_grp == 2 & diag >= "I72" & diag <= "I78" ~ 1.12,
sex_grp == 1 & diag >= "J10" & diag <= "J18"~ 1.40, #Pneumonia, influenza
sex_grp == 2 & diag >= "J10" & diag <= "J18" ~ 1.10,
sex_grp == 1 & diag %in% c("J40", "J41", "J42", "J43") ~ 15.64, #Chronic obstructive lung disease
sex_grp == 2 & diag %in% c("J40", "J41", "J42", "J43") ~ 11.77,
sex_grp == 1 & diag == "J44" ~ 6.80, #Chronic airway obstruction
sex_grp == 2 & diag == "J44" ~ 6.78,
sex_grp == 1 & diag %in% c("K25", "K26", "K27") ~ 1.80, #Stomach / duodenal ulcer
sex_grp == 2 & diag %in% c("K25", "K26", "K27") ~ 1.40,
TRUE ~ 0))
###############################################.
## Part 4 - Aggregating geographic areas ----
###############################################.
smoking_deaths %<>%
# Excluding cases where young people has a disease for which only risk for older people.
filter(current > 0) %>%
#creating code variable with all geos and then aggregating to get totals
gather(geolevel, code, ca2019:scotland) %>%
select(-c(geolevel)) %>%
group_by(code, year, sex_grp, age_grp, current, ex) %>% count() %>% ungroup()
saveRDS(smoking_deaths, file=paste0(data_folder, 'Temporary/smoking_deaths_part3.rds'))
###############################################.
# Merging prevalence with smoking deaths basefile
smoking_deaths <- left_join(smoking_deaths, smok_prev_area, by = c("code", "year", "sex_grp")) %>%
#recode age groups to match prevalence by age file
mutate(age_grp = as.numeric(age_grp),
age_grp2 = case_when(age_grp>=8 & age_grp<=11 ~ 2,
age_grp>=12 & age_grp<=13 ~ 3,
age_grp>=14 & age_grp<=15 ~ 4,
age_grp>=16 ~ 5))
#And now merging with the file with prevalence by age and sex
smoking_deaths <- left_join(smoking_deaths, smok_prev_age, by = c("age_grp2", "year", "sex_grp"))
###############################################.
## Part 5 - Calculate smoking attributable fractions ----
###############################################.
# Calculate age, sex and area specific esimtated prevalence info using
# Public Health England formula. divide by 100 to get a proportion.
smoking_deaths %<>%
mutate(# current and ex smoker prevalence specific to area, age and sex group.
prev_current = (current_area/scot_current)*current_age/100,
prev_ex=(ex_area/scot_ex)*ex_age/100,
# Calculating smoking attributable fraction
saf = (prev_current*(current-1) + prev_ex*(ex-1))/
(1 + prev_current*(current-1) + prev_ex*(ex-1)),
# compute total number of deaths attributable to smoking, using SAF
numerator = n * saf) %>%
# sum up safs to get total deaths attributable to smoking.
group_by(code, year, sex_grp, age_grp) %>%
summarise(numerator = sum(numerator)) %>% ungroup()
smoking_deaths %<>%
filter(year<2022)
saveRDS(smoking_deaths, file=paste0(data_folder, 'Prepared Data/smoking_deaths_raw.rds'))
###############################################.
## Part 6 - Run analysis functions ----
###############################################.
#All patients asthma
analyze_first(filename = "smoking_deaths", measure = "stdrate", geography = "all",
pop = "CA_pop_allages", yearstart = 2012, yearend = 2021,
time_agg = 2, epop_age = "normal")
analyze_second(filename = "smoking_deaths", measure = "stdrate", time_agg = 2,
epop_total = 120000, ind_id = 20201, year_type = "calendar")
# Rounding figures - they are estimates and rounding helps to understand that
# they are not precise
data_shiny <- readRDS(file = paste0(data_folder, "Data to be checked/smoking_deaths_shiny.rds")) %>%
mutate(numerator = round(numerator, -1)) %>% #to nearest 10
mutate_at(c("rate", "lowci", "upci"), round, 0) # no decimals
saveRDS(data_shiny, file = paste0(data_folder, "Data to be checked/smoking_deaths_shiny.rds"))
write_csv(data_shiny, file = paste0(data_folder, "Data to be checked/smoking_deaths_shiny.rds"))
##END