-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathcreate vulnerability index - primary care networks.r
141 lines (109 loc) · 5.43 KB
/
create vulnerability index - primary care networks.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
##
## Aggregate VI into Primary Care Networks (PCNs)
##
library(tidyverse)
library(readxl)
library(sf)
library(janitor)
source("functions.r")
# ---- Build Primary Care Network boundaries ----
# PCNs don't have boundaries as such, but we can build them from the boundaries of their member GPs
# Load GP Practice Submitted Inner Catchment Area
# Source: https://data.england.nhs.uk/dataset/gp-practice-submitted-inner-catchment-area-kml-file
# Using the latest data we can find: 2019_03_KML_Data
# gp_shp = read_sf("data/boundaries/GP_Catchment_Area.kml")
# Load GP submitted inner catchment areas - Jan-Mar 2020
# Source: https://digital.nhs.uk/data-and-information/data-collections-and-data-sets/data-collections/general-practice-data-collections
gp_shp = read_sf("https://files.digital.nhs.uk/assets/eDEC/eDecJan-Mar2020.kml")
# Some GPs have more than one polygon - merge them
gp_shp = gp_shp %>%
group_by(Name) %>%
summarise()
# Split `Name` column into Codes and Names
gp_shp = gp_shp %>%
separate(Name, c("Code", "Name"), sep = " - ")
# Load GP to PCN lookup
# Source: `epcn` file from https://digital.nhs.uk/services/organisation-data-service/data-downloads/gp-and-gp-practice-related-data
gp_pcn = read_excel("data/primary-care-networks/ePCN.xlsx", sheet = "PCN Core Partner Details")
gp_pcn = gp_pcn %>%
select(GP_Code = `Partner\r\nOrganisation\r\nCode`, GP_Name = `Partner\r\nName`, PCN_Code = `PCN Code`, PCN_Name = `PCN Name`)
pcn_shp_all = gp_shp %>%
left_join(gp_pcn, by = c("Code" = "GP_Code")) %>%
filter(!is.na(PCN_Code))
pcn_shp = pcn_shp_all %>%
group_by(PCN_Code, PCN_Name) %>%
summarise()
write_sf(pcn_shp, "data/boundaries/Primary_Care_Networks.shp")
write_sf(pcn_shp, "data/boundaries/Primary_Care_Networks.geojson")
# TO DO:
# - Some parts of some MSOAs might be in multiple PCNs and a PCN contains multiple (parts of) MSOAs...
# - ... so calculate population-weighted vulnerability based on proportion of population in each PCN
#
# ---- Create lookup for MSOAs to PCNs based on centroids ----
# Middle Layer Super Output Areas (December 2011) Population Weighted Centroids
# Source: https://geoportal.statistics.gov.uk/datasets/middle-layer-super-output-areas-december-2011-population-weighted-centroids
msoa_centroids = read_sf("https://opendata.arcgis.com/datasets/b0a6d8a3dc5d4718b3fd62c548d60f81_0.geojson") %>%
st_transform(crs = 4326)
# Lookup the PCN each MSOA centroid is in
msoa_pcn = msoa_centroids %>%
st_join(pcn_shp) %>%
st_drop_geometry() %>%
select(MSOA11CD = msoa11cd, PCN_Code)
# Save lookup
write_csv(msoa_pcn, "data/lookup msoa to primary care network.csv")
# ---- Create lookup based on overlapping polygons ----
# Middle Layer Super Output Areas (December 2011) Boundaries EW BGC
# Source: https://geoportal.statistics.gov.uk/datasets/middle-layer-super-output-areas-december-2011-boundaries-ew-bgc
msoa_shp = read_sf("https://opendata.arcgis.com/datasets/1e6f162967de4f3da92040761e8464d8_0.geojson") %>%
st_transform(crs = 4326)
pcn_shp_tmp = pcn_shp %>%
arrange(PCN_Code) %>%
mutate(PCN_ID = row_number()) %>%
select(PCN_ID, PCN_Code) %>%
arrange(PCN_ID)
msoa_shp_tmp = msoa_shp %>%
arrange(MSOA11CD) %>%
mutate(MSOA_ID = row_number()) %>%
select(MSOA_ID, MSOA11CD) %>%
arrange(MSOA_ID)
pcn_msoa = pcn_shp_tmp %>%
st_intersects(msoa_shp_tmp, sparse = TRUE) %>%
as.data.frame() %>%
as_tibble()
pcn_msoa = pcn_shp_tmp %>%
st_join(msoa_shp_tmp) %>%
st_drop_geometry()
write_csv(pcn_msoa, "data/lookup primary care network to msoas.csv")
# ---- Calculate proportion of 20% most vulnerable MSOAs that are in/overlap PCN boundaries ----
# - Load data -
vi_msoa = read_csv("output/vulnerability-MSOA-England.csv") # run "create vulnerability index - MSOA.r" to make this dataset
pcn_msoa <- read_csv("data/lookup primary care network to msoas.csv")
# pop = read_csv("data/population estimates msoa11 lad17 lad19 tacticall cell.csv")
# - calculate proportions of highly vulnerable MSOAs in or overlapping each PCN -
vi_pcn_clinical <- vi_msoa %>%
select(MSOA11CD = Code, `Clinical Vulnerability decile`) %>%
left_join(pcn_msoa, by = "MSOA11CD") %>%
# label MSOAs by whether they're in top 20% most-vulnerable then summarise by this label
mutate(Top20 = ifelse(`Clinical Vulnerability decile` >= 9, "Top20", "Other")) %>%
tabyl(PCN_Code , Top20) %>%
# calculate proportion of most vulnerable MSOAs
mutate(Proportion = Top20 / (Top20 + Other)) %>%
select(PCN_Code, `Proportion of highly clinically vulnerable neighbourhoods` = Proportion)
#... Same for socioeconomic vulnerability
vi_pcn_ses <- vi_msoa %>%
select(MSOA11CD = Code, `Socioeconomic Vulnerability decile`) %>%
left_join(pcn_msoa, by = "MSOA11CD") %>%
# label MSOAs by whether they're in top 20% most-vulnerable then summarise by this label
mutate(Top20 = ifelse(`Socioeconomic Vulnerability decile` >= 9, "Top20", "Other")) %>%
tabyl(PCN_Code , Top20) %>%
# calculate proportion of most vulnerable MSOAs
mutate(Proportion = Top20 / (Top20 + Other)) %>%
select(PCN_Code, `Proportion of highly socioeconomically vulnerable neighbourhoods` = Proportion)
vi_pcn_clinical %>%
left_join(vi_pcn_ses, by = "PCN_Code") %>%
write_csv("output/vulnerability-PCN.csv")
# Look at relationship between clinical and socioeconomic vulnerability
# vi_msoa %>%
# ggplot(aes(x = `Socioeconomic Vulnerability rank`, y = `Clinical Vulnerability rank`)) +
# geom_point(alpha = 0.3) +
# geom_smooth(method = "lm")