-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy path09_Costing_outpatient_visits.R
232 lines (165 loc) · 13.2 KB
/
09_Costing_outpatient_visits.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Project: English results for the International Collaborative on Costs, Outcomes & Needs in Care (ICCONIC)
# Purpose: Process outpatient data and use HRG grouper to create indicative costs for attendances
# Author: Dr Will Parry
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Setup - load packages, source folder locations ----
pkgs <- c('tidyverse', 'readxl', 'here')
sapply(pkgs, require, character.only = TRUE)
#__________________________________
# Prepare data for HRG grouper ----
HRGyears <- 2013:2017 #set study years for which we want outpatient data
cohorts <- readRDS(here('../Data', 'cohorts.rds')) #read in cohort data
OPapps <- readRDS(here('../Data', 'hesop_app.rds')) %>% #read in outpatient appointments data
inner_join(cohorts) %>% #join on cohort information
filter(is.na(dod) | (apptdate <= dod)) %>% #check dates are not after death
filter(HES_yr %in% HRGyears) %>% #filter out study years
filter(!attended %in% c(0, 3)) %>% #remove DNAs and future appointments
mutate(heartpre = apptdate >= (heart - 365) & apptdate < heart, #add indicators for cohort pre/post relevance...
heartpost = apptdate < (heart + 365) & apptdate >= heart,
hippre = apptdate >= (hip - 365) & apptdate < hip,
hippost = apptdate < (hip + 365) & apptdate >= hip,
dempre = apptdate >= (dementia - 365) & apptdate < dementia,
dempost = apptdate < (dementia + 365) & apptdate >= dementia) %>%
filter(heartpre | heartpost | hippre | hippost | dempre | dempost) %>% #filter out rows with TRUE in pre/post indicators
left_join(readRDS(here('../Data', 'hesop_clin_wp.rds'))) #join on clinical information
group_by(OPapps, stafftyp) %>% tally() #check counts of staff types - lots of 99 (NOT KNOWN)
dat <- OPapps %>% mutate(dummy = paste0(patid, '-', attendkey)) %>% #add an ID variable to the data
select(startage = apptage, sex = gender, mainspef, tretspef, firstatt, #restrict to fields required for the HRG grouper
opertn_01:opertn_12, dummy, HES_yr) %>% #...keeping the dummy ID and HES_yr columns
mutate(opertn_01 = ifelse(opertn_01 == '-', NA, opertn_01)) %>% #remove any '-' values in the opertn_01 field
rename_at(vars(starts_with('opertn')), list(~sub('opertn', 'oper', .))) #rename opertn variables to 'oper'
names(dat) <- toupper(names(dat)) #make names all upper case for the HRG grouper
#Write HRG grouper input files ----
dat %>% group_by(HES_YR) %>% #group by HES_yr
do(write_csv(., here('../Data', paste0('OP_HRG_grouper_input_', unique(.$HES_YR),'.csv')), na = '')) #write to csv (by HES_yr)
#Write RDF file for HRG grouper ----
nms <- names(dat)
rdf <- c('COMMA DELIMITED\r', paste(nms, nms, 1:length(nms), '', '', '', '\r', sep = ','))
write_lines(rdf, here('../Data', 'OP.rdf'))
#[WP: At this point I ran all of the HRG groupers for each of the years of data from 2013/14 to 2017/18]
#Performance of HRG groupers
#2013: ~ 1% errors
#2014: ~ 4% errors
#2015: ~12% errors - PRETTY BAD!
#2016: ~10% errors - PRETTY BAD!
#2017: ~ 8% errors - PRETTY BAD!
#Check errors in 2015, which was particularly bad
# errors2015 <- read_csv(here('../Data', 'HRG_grouper_output', 'OP_2015_quality.csv')) %>% group_by(`Error Message`) %>%
# tally() %>% arrange(desc(n))
# View(errors2015)
#___________________________________________
#Read in HRG grouper output files ----
#Store names of HRG output columns in a vector
col_nms <- c('startage','sex','mainspef','tretspef','firstatt','oper_01','oper_02','oper_03','oper_04','oper_05','oper_06','oper_07',
'oper_08','oper_09','oper_10','oper_11','oper_12','dummy','HES_yr','RowNo','NAC_HRG','GroupingMethodFlag',
'DominantProcedure','AttendanceHRG','AttendSSC_Ct','AttendSSC1','AttendSSC2','AttendSSC3','AttendSSC4','AttendSSC5',
'AttendBP_Ct','AttendBP1','AttendBP2','AttendBP3','AttendBP4','AttendBP5','AttendFlag_Ct','AttendFlag1','AttendFlag2',
'AttendFlag3','AttendFlag4','AttendFlag5')
ub_nms <- paste0('UnbundledHRGs', 1:25) #create 25 unbundled HRG column names (HRG grouper does not create headers for these in outputs)
cols <- c(col_nms, ub_nms) #add unbundled column names to HRG output column names
coltypes <- paste(c('iiici', rep('c', 12), c('ciiccc'), rep('i', 19), rep('c', 25)), collapse = '') #set the types for the columns and store as vector
#List HRG grouper spell output files and read in
files <- list.files(here('../Data', 'HRG_grouper_output'), pattern = 'OP_...._attend.csv', full.names = TRUE) #list HRG output files for attendances
years <- str_extract(files, '_...._') %>% str_sub(2,5) #get HRG years
#NB: ignore warnings on running read_csv; this is just because unbundled columns often do not have much data and no headers
HRGoutput <- files %>% map(read_csv, skip = 1, col_types = coltypes, col_names = cols) #read in HRG grouper output (spell level files) replacing the column names
names(HRGoutput) <- years #name the datasets using their years
HRGoutput <- HRGoutput %>% bind_rows(.id = 'HRGyear') #bind each year of data together
saveRDS(HRGoutput, here('../Data', 'OP_HRGoutput.rds')) #save to file
#________________________________
#Cost the grouped OP spells ----
HRGoutput <- readRDS(here('../Data', 'OP_HRGoutput.rds')) %>% #read in the HRG grouper output RDS file created above
separate(dummy, c('patid', 'attendkey'), '-') %>% #create separate ID fields (patid, attendkey) from the dummy column
mutate(patid = as.numeric(patid), attendkey = as.numeric(attendkey)) %>% #format them as numeric (were character)
select(HRGyear, patid, attendkey, tretspef, NAC_HRG, GroupingMethodFlag, UnbundledHRGs1:UnbundledHRGs25) #select required fields
HRGcosts <- readRDS(here('Reference_data', 'HRG_reference_costs', 'HRGcosts.rds')) #read in HRG costs file
mainHRGcosts <- HRGcosts %>% filter(type %in% c('CL', 'NCL', 'OPROC')) %>% #filter out outpatient-relevant costs
pivot_wider(id_cols = c(HRGyear, HRG, servicecode), names_from = type, values_from = unitcost) #make wide format
#stafftyp: 3 = lead care professional, 4 = member of professional care team, 99 = unknown
#NB: where stafftyp is 99 we've assumed consultant/lead care professional
#GroupingMethodFlag: O=outpatient default, P=procedure driven, G=global exception, U=Error
OPmain <- OPapps %>% select(patid, attendkey, stafftyp) %>% #get stafftyp from OPapps
left_join(HRGoutput, by = c('patid', 'attendkey')) %>% #join HRG output onto these
filter(GroupingMethodFlag != 'U') %>% #remove failed groupings
left_join(mainHRGcosts, by = c('NAC_HRG' = 'HRG', 'HRGyear', 'tretspef' = 'servicecode')) %>% #join the main HRG costs onto the HRG output
mutate(across(c(NCL, CL, OPROC), ~ replace_na(.x, 0))) %>% #change any NA values to zero
mutate(staffcost = ifelse(stafftyp == 4, NCL, CL), #if staff is not a consultant/lead care professional, use NCL cost
cost = staffcost + OPROC) #sum staff based cost and OPROC (there is only ever one or the other)
#Dealing with unbundled costs ----
# Unbundled costs that are included:
# CHEM - Outpatient
# IMAG - Outpatient
# NM - Outpatient
# HCD - Outpatient
# RAD - Outpatient
# SPC - Outpatient
#Get the unbundled HRG grouper output in long format
unbundled <- HRGoutput %>% select(HRGyear, patid, attendkey, UnbundledHRGs1:UnbundledHRGs25) %>% #select unbundled HRGs
pivot_longer(cols = starts_with('UnbundledHRGs'), values_to = 'HRGcode', values_drop_na = TRUE) %>% #make dataset long
separate(HRGcode, c('HRG', 'multiplier'), sep = '\\*', fill = 'right') %>% #separate out the multiplier into another column
mutate(multiplier = ifelse(is.na(multiplier), 1, multiplier), #ensure every row has a multiplier of at least 1
multiplier = as.numeric(str_sub(multiplier, 1, 1))) #assume typos, where higher than 10 (debateable)
HRGcosts_unbundled <- HRGcosts %>% #get HRG costs
filter(type %in% c('CHEM', 'IMAG', 'NM', 'HCD', 'RAD', 'SPC') & str_detect(servicedescription, 'Outpat')) %>% #filter out outpatient unbundled types
right_join(unbundled) %>% #join onto the unbundled grouper output
mutate(ubcost = unitcost * multiplier) %>% #multiply unit cost by unbundled multiplier
group_by(HRGyear, patid, attendkey) %>% #group by the year, patid and attendance ID and...
summarise(ubcost = sum(ubcost, na.rm = TRUE)) #sum total unbundled costs by these groups
#Cost the outpatient attendances ----
deflators <- readRDS(here('Reference_data', 'deflators.rds'))
OPcosts <- OPapps %>% select(HES_yr, patid, attendkey, apptdate, gender, dod, hip, hip_cat, heart, dementia, #get required columns of outpatient appointments
heartpre:dempost, hip_agegroup, heart_agegroup, dem_agegroup, mainspef) %>%
left_join(OPmain) %>% left_join(HRGcosts_unbundled) %>% left_join(deflators) %>% #join on the main HRG costs, the unbundled costs and the deflators
mutate(across(c(ubcost, cost), ~ replace_na(.x, 0))) %>% #change any NA values to zeroes
mutate(totcost = (ubcost + cost) * def_mult) %>% #calculate total cost (2016/17 prices) of attendance by summing and multiplying by deflator
select(patid:GroupingMethodFlag, totcost) #reduce the dataset size
saveRDS(OPcosts, here('../Data', 'OPcosts.rds'))
#_________________________
#Create summary stats ----
cohorts <- readRDS(here('../Data', 'cohorts.rds')) %>% filter(inhip | inheart | indem) #filter out only the valid cohorts
OPbuckets <- read_excel(here('Reference_data', 'Outpatient_specialty_buckets.xlsx')) #read in the categorised specialties (based on 'mainspef')
OPcosts <- readRDS(here('../Data', 'OPcosts.rds')) %>% left_join(OPbuckets) %>% inner_join(cohorts %>% select(patid, inhip:indem)) #join everything together
source(here('R', 'Output_functions.R')) #source the output functions for creating summary statistics as required by the ICCONIC team
#Set cohorts to use
hipcats <- c('hip_op_total','hip_op_partial','hip_op_osteopin') #hip cats to include
hip_cohort <- cohorts %>% filter(inhip, hip_cat %in% hipcats)
heartd_cohort <- cohorts %>% filter(inheartd)
heart_cohort <- cohorts %>% filter(inheart)
heartdc_cohort <- cohorts %>% filter(inheartdc)
heartnodc_cohort <- cohorts %>% filter(inheartnodc)
#Hip cohort
hipstats1 <- stats_func(hip_cohort, OPcosts, 'totcost', 'hippost', 'hip_agegroup', hipcats, extra_cat = 'category') #create summary statistics by categorised specialties
write_csv(hipstats1, here('../Outputs', 'hip_op_stats1.csv'), na = '0') #write to file
hipstats1 <- stats_func(hip_cohort, OPcosts, 'totcost', 'hippost', 'hip_agegroup', hipcats) #create summary statistics
write_csv(hipstats2, here('../Outputs', 'hip_op_stats2.csv'), na = '0') #write to file
#Heart cohort
heartstats1 <- stats_func(heartd_cohort, OPcosts, 'totcost', 'heartpost', 'heart_agegroup', hip_cats = NULL, extra_cat = 'category') #create summary statistics by categorised specialties
write_csv(heartstats1, here('../Outputs', 'heart_op_stats1.csv'), na = '0') #write to file
heartstats2 <- stats_func(heartd_cohort, OPcosts, 'totcost', 'heartpost', 'heart_agegroup') #create summary statistics
write_csv(heartstats2, here('../Outputs', 'heart_op_stats2.csv'), na = '0') #write to file
heartstats3 <- stats_func(heart_cohort, OPcosts, 'totcost', 'heartpost', 'heart_agegroup') #create summary statistics
write_csv(heartstats3, here('../Outputs', 'heart_op_stats3.csv'), na = '0') #write to file
heartstats4 <- stats_func(heartdc_cohort, OPcosts, 'totcost', 'heartpost', 'heart_agegroup') #create summary statistics
write_csv(heartstats4, here('../Outputs', 'heart_op_stats4.csv'), na = '0')
heartstats5 <- stats_func(heartnodc_cohort, OPcosts, 'totcost', 'heartpost', 'heart_agegroup') #create summary statistics
write_csv(heartstats5, here('../Outputs', 'heart_op_stats5.csv'), na = '0') #write to file
#__________________
#Lookback year ----
#Hip cohort
hipstats1 <- stats_func(hip_cohort, OPcosts, 'totcost', 'hippre', 'hip_agegroup', hipcats, extra_cat = 'category') #create summary statistics by categorised specialties
write_csv(hipstats1, here('../Outputs', 'lookback_hip_op_stats1.csv'), na = '0') #write to file
hipstats2 <- stats_func(hip_cohort, OPcosts, 'totcost', 'hippre', 'hip_agegroup', hipcats) #create summary statistics
write_csv(hipstats2, here('../Outputs', 'lookback_hip_op_stats2.csv'), na = '0') #write to file
#Heart cohort
heartstats1 <- stats_func(heartd_cohort, OPcosts, 'totcost', 'heartpre', 'heart_agegroup', hip_cats = NULL, extra_cat = 'category') #create summary statistics by categorised specialties
write_csv(heartstats1, here('../Outputs', 'lookback_heart_op_stats1.csv'), na = '0') #write to file
heartstats2 <- stats_func(heartd_cohort, OPcosts, 'totcost', 'heartpre', 'heart_agegroup') #create summary statistics
write_csv(heartstats2, here('../Outputs', 'lookback_heart_op_stats2.csv'), na = '0') #write to file
#_______________________________________________________
#Additional work on cumulative statistics by decile ----
x <- OPcosts %>% filter(category %in% c('1_MD specialist', '2_Outpatient Diagnostic MD'))
dec_hipall_opcosts <- decile_func(hip_cohort, x, 'hippost', 'totcost')
dec_heartd_opcosts <- decile_func(heartd_cohort, x, 'heartpost', 'totcost')
rbind(dec_hipall_opcosts, dec_heartd_opcosts) %>% #bind the results together
write_csv(here('../Outputs', 'op_costs_deciles.csv')) #write to file