-
Notifications
You must be signed in to change notification settings - Fork 0
/
helper_ecdf.R
188 lines (162 loc) · 7.07 KB
/
helper_ecdf.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# TO DO: Standardize so all output has only the value of interest for null_distributions
get_cell_distributions = function(null_distributions) {
for(i in 1:length(null_distributions)) {
# Temporary list to store the FunRao values for the current overarching item
tempResults <- list()
cell_num = length(null_distributions[[i]][[1]][[1]])
# Initialize lists to collect FunRao values for each label
for(j in 1:cell_num) {
tempResults[[as.character(j)]] <- vector()
}
# Iterate over each of the 100 items within the current overarching item
for(item in null_distributions[[i]]) {
# Access the FunRao list
val_list <- item$Q
# Iterate over each integer label in the FunRao list
for(label in 1:length(val_list)) {
# Append the value to the appropriate list in tempResults
tempResults[[as.character(label)]] <- c(tempResults[[as.character(label)]], val_list[[label]])
}
}
# Add the compiled FunRao values for the current overarching item to the results list
cell_distributions_1[[as.character(i)]] <- tempResults
}
}
# REQUIRES: A cell_distributions object, with each of n cells being a list of values or NA
# REQUIRES: A number of cores >= 1
# MODIFIES: N/A
# EFFECTS: Outputs a list of ecdf functions, one for each distribution in cell_distributions
get_ecdf_list = function(cell_distributions, cores=1) {
# Cores must be >= 1
if(cores < 1) {
stop("Invalid core number for get_monthly_rasters - must be >1")
}
if(cores == 1) {
ecdf_list = lapply(1:length(cell_distributions), function(cell_num) {
row_data <- unlist(cell_distributions[cell_num])
if(all(is.na(row_data))) {
return(NA)
} else {
# Remove NAs and compute ECDF, then add to the list
return(ecdf(na.omit(row_data)))
}
})
} else {
ecdf_list = mclapply(1:length(cell_distributions), function(cell_num) {
row_data <- unlist(cell_distributions[cell_num])
if(all(is.na(row_data))) {
return(NA)
} else {
# Remove NAs and compute ECDF, then add to the list
return(ecdf(na.omit(row_data)))
}
}, mc.cores=cores)
}
# Retain site index info
names(ecdf_list) = names(cell_distributions)
return(ecdf_list)
}
# REQUIRES: A cell_distributions_list object, which is a list of lists of
# distributions for each cell
# REQUIRES: A number of cores >= 1
# MODIFIES: N/A
# EFFECTS: Outputs a list of lists of ecdf functions, one for each distibution
# in each list of cell_distributions
get_ecdf_lists = function(cell_distributions_list, cores=1) {
# Cores must be >= 1
if(cores < 1) {
stop("Invalid core number for get_monthly_rasters - must be >1")
}
if(cores == 1) {
ecdfs_list = lapply(1:length(cell_distributions_list),
function(cell_num) get_ecdf_list(cell_distributions_list[[cell_num]]))
} else {
ecdfs_list = mclapply(1:length(cell_distributions_list),
function(cell_num) get_ecdf_list(cell_distributions_list[[cell_num]],
cores=cores),
mc.cores=cores)
}
return(ecdfs_list)
}
# REQUIRES: A list of ecdf functions
# REQUIRES: A list of observed values, of equal length
# MODIFIES: N/A
# EFFECTS: For each ecdf function, plugs in corresponding observed value - ECDF(observed_val)
# EFFECTS: Outputs a list of ECDF values for each cell
get_ecdf_vals = function(ecdf_list, observed_vals) {
# Gets a list of ecdfs for each cell for a given month
output_list <- vector("list", length = length(ecdf_list))
names(output_list) <- names(ecdf_list)
output_list = lapply(1:length(ecdf_list), function(ecdf_index) {
if (is.na(ecdf_list[[ecdf_index]])) {
# If the ecdf function is NA, set the output to NA
return(NA)
} else {
cell_name <- names(ecdf_list)[ecdf_index]
if (!cell_name %in% names(observed_vals)) {
# If there's no observed value for this cell, set the output to NA
return(NA)
} else {
# Calculate the difference between observed value and the ecdf function range
observed <- observed_vals[[cell_name]]
ecdf_func <- ecdf_list[[ecdf_index]]
# Get the probabiliy of the observed value being at least as large as it is
p = ecdf_func(observed)
# Add to list
return(p)
}
}
})
names(output_list) = names(ecdf_list)
return(output_list)
}
# REQUIRES: A resolution for the raster
# REQUIRES: A location of the .tif file
# REQUIRES: A month to determine which .tif to select
# MODIFIES: N/A
# EFFECTS: Outputs a list c(nrow, ncol) of the dimensions of the raster
get_dims_from_tif = function(res=1, location="./data", month="jul") {
raster = rast(paste0(location, "/", month, "_stack_", as.character(res), ".tif"))
return(c(nrow(raster), ncol(raster)))
}
# REQUIRES: A list of ecdf_vals (or really any values of equal length to the raster being made)
# REQUIRES: Dims = c(nrow, ncol) for the raster; if NA, tries to get dims using month
# REQUIRES: A resolution for the raster
# REQUIRES: An extent for the raster
# REQUIRES: A month that the raster is representing, used to get dims; if NA, uses July as a default
# REQUIRES: A plot label
# MODIFIES: N/A
# EFFECTS: Outputs a plot of the input values on a raster of the given size, laid over a world map
plot_ecdf = function(ecdf_vals, dims=NA, res=1, ext=c(-92.5, -67.5, 38, 48), month="jul",
plot_label="Flowers") {
map = ne_countries(scale='medium', type='map_units', returnclass='sf',
continent=c("North America")) # Generic world map
if(is.na(dims)) {
if(is.na(month)) {
stop("No dimensions supplied for plot_ecdf()!")
} else {
dims = get_dims_from_tif(res=res, month=month)
}
}
raster = rast(nrow=dims[1], ncol=dims[2], xmin=ext[1], xmax=ext[2],
ymin=ext[3], ymax=ext[4])
values(raster) = unlist(ecdf_vals)
# Convert SpatRaster to data frame for ggplot2
raster_df <- as.data.frame(terra::as.data.frame(raster, xy=TRUE), na.rm=TRUE)
names(raster_df) <- c("x", "y", "value")
p <- ggplot() + geom_sf(data=map, color="black") +
coord_sf(xlim=c(-92.5, -67.5), ylim=c(38, 48)) +
geom_raster(data = raster_df, aes(x = x, y = y, fill = value), alpha=0.75) +
scale_fill_gradient2(low = "red", mid="yellow", high = "green", name = "ECDF(FRic)",
limits = c(0, 1), breaks = c(0, 1), labels = c("0", "1"), midpoint=0.5) +
theme_minimal() +
labs(title = plot_label, subtitle="") +
theme(plot.title = element_text(hjust = 0.5),# Center the title
plot.subtitle = element_text(hjust=0.5),
axis.text.x = element_blank(), # Remove x-axis text
axis.ticks.x = element_blank(), # Optionally, remove x-axis ticks if desired
axis.title.x = element_text(margin = margin(t = 20)),
axis.title.x.bottom = element_blank(),
axis.title.y.left = element_blank()) # Adjust x-axis title position if necessary
return(p)
}