Skip to content

Commit

Permalink
Merge pull request #17 from kdorheim/weighted_mse
Browse files Browse the repository at this point in the history
Tried weighting MSEs
  • Loading branch information
ptrscll authored Jun 18, 2024
2 parents 69ee509 + ca3d48c commit fa4397a
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 24 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/run-r-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:

- name: Install R packages
run: |
Rscript -e 'install.packages(c("here", "assertthat"))' # Add any packages your script needs
Rscript -e 'install.packages(c("here", "assertthat", "dplyr"))' # Add any packages your script needs
- name: Run R script
run: |
Rscript tests/error_tests.R
Binary file added results/comparison_plots.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
39 changes: 39 additions & 0 deletions results/initial_norm_exp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
parameters values
beta 0.732
q10_rh 2.64
diff 2.4

Objective Function Value: 0.00173

CO2 MSE: 58.5
T MSE: 0.0651
RMSE: 7.65

***Key Metrics***
TCRE: 1.53
TCR: 1.76

***Historical Warming and ERF***
GSAT Warming: 0.633
Ocean Heat Content Change: 423
Total Aerosol ERF: -1.24
WMGHG ERF: 3.63
Methane ERF: 0.538

***Future Warming***
scenario start end GSAT
ssp119 2021 2040 0.694
ssp119 2041 2060 0.842
ssp119 2081 2100 0.693
ssp126 2021 2040 0.709
ssp126 2041 2060 1.02
ssp126 2081 2100 1.05
ssp245 2021 2040 0.715
ssp245 2041 2060 1.23
ssp245 2081 2100 1.9
ssp370 2021 2040 0.725
ssp370 2041 2060 1.37
ssp370 2081 2100 2.86
ssp585 2021 2040 0.842
ssp585 2041 2060 1.68
ssp585 2081 2100 3.71
43 changes: 39 additions & 4 deletions scripts/error_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,19 @@ rmse <- function(x, y) {
return(sqrt(mse(x, y)))
}

# nmse - function to find normalized mean squared error between two vectors
# (NMSE = MSE / x^2)
#
# args:
# x - observed values
# y - predicted values
#
# returns: numeric vector length 1 containing the NMSE between predicted and
# observed values
nmse <- function(x, y) {
return(mse(x, y) / sum(x^2))
}


# get_var_mse: function to find MSE between observed and predicted data for
# a given variable
Expand All @@ -42,16 +55,17 @@ rmse <- function(x, y) {
# hector_data - data frame outputted by Hector
# var - variable name
# yrs - vector of years for finding MSE
# mse_fn - function to calculate mse with (default = mse)
#
# Returns: MSE between predicted and observed data for var
get_var_mse <- function(obs_data, hector_data, var, yrs) {
get_var_mse <- function(obs_data, hector_data, var, yrs, mse_fn = mse) {
obs_vec <-
filter(obs_data, year %in% yrs & variable == var)$value

hector_vec <-
filter(hector_data, year %in% yrs & variable == var)$value

return(mse(obs_vec, hector_vec))
return(mse_fn(obs_vec, hector_vec))
}

# mean_T_CO2_mse: function to find the mean of the temperature and CO2 MSEs
Expand All @@ -75,6 +89,28 @@ mean_T_CO2_mse <- function(obs_data, hector_data) {
return(mean(c(T_mse, CO2_mse)))
}

# mean_T_CO2_nmse: function to find the mean of the temperature and CO2 NMSEs
# between observed and predicted data for a given variable
#
# args:
# obs_data - data frame of observed data formatted like Hector data frame
# hector_data - data frame outputted by Hector
#
# Returns: NMSE between predicted and observed data for var
mean_T_CO2_nmse <- function(obs_data, hector_data) {
T_mse <- get_var_mse(obs_data = obs_data,
hector_data = hector_data,
var = GMST(),
yrs = 1850:2014,
mse_fn = nmse)
CO2_mse <- get_var_mse(obs_data = obs_data,
hector_data = hector_data,
var = CONCENTRATIONS_CO2(),
yrs = c(1750, 1850:2014),
mse_fn = nmse)
return(mean(c(T_mse, CO2_mse)))
}

# smooth_T_CO2_mse: function to find the mean of smoothed temperature and CO2
# MSEs between observed & predicted data for a given variable
#
Expand All @@ -97,7 +133,6 @@ smooth_T_CO2_mse <- function(obs_data, hector_data) {
CO2_mse <- get_var_mse(obs_data = obs_data,
hector_data = hector_data,
var = CONCENTRATIONS_CO2(),
yrs = c(1750, 1850:2014))

yrs = c(1750, 1850:2014))
return(mean(c(T_mse, CO2_mse)))
}
70 changes: 70 additions & 0 deletions scripts/graph_comparison_plots.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Script to compare plots that use different hector variables
# Author: Peter Scully
# Date: 6/13/24

### Constants and Imports ###

# Importing libraries
library(hector)
library(ggplot2)

# Setting up file paths
COMP_DATA_DIR <- file.path(here::here(), "comparison_data")
SCRIPTS_DIR <- file.path(here::here(), "scripts")
RESULTS_DIR <- file.path(here::here(), "results")

CO2_PATH <- file.path(COMP_DATA_DIR,
"Supplementary_Table_UoM_GHGConcentrations-1-1-0_annualmeans_v23March2017.csv")
TEMP_PATH <-
file.path(COMP_DATA_DIR,
"HadCRUT.5.0.2.0.analysis.summary_series.global.annual.csv")

INI_FILE <- system.file("input/hector_ssp245.ini", package = "hector")
PARAMS <- c(BETA(), Q10_RH(), DIFFUSIVITY())

OUTPUT <- file.path(RESULTS_DIR, "comparison_plots.jpeg")


source(file.path(SCRIPTS_DIR, "major_functions.R"))

### Getting observational data ###
co2_data <- get_co2_data(CO2_PATH)
co2_data$lower <- co2_data$value
co2_data$upper <- co2_data$value

temp_data <- get_temp_data(TEMP_PATH, include_unc = T)
temp_data <- filter(temp_data, year <= 2014)

obs_data <- rbind(co2_data, temp_data)

### Running Hector ###
default_data <- run_hector(ini_file = INI_FILE,
params = NULL,
vals = NULL,
yrs = 1750:2014,
vars = c(GMST(), CONCENTRATIONS_CO2()))
default_data$scenario <- "Hector - Default Fit"

nmse_data <- run_hector(ini_file = INI_FILE,
params = PARAMS,
vals = c(0.732, 2.64, 2.4),
yrs = 1750:2014,
vars = c(GMST(), CONCENTRATIONS_CO2()))
nmse_data$scenario <- "Hector - Fit to NMSEs"

hector_data <- rbind(default_data, nmse_data)
hector_data$lower <- hector_data$value
hector_data$upper <- hector_data$value

comb_data <- rbind(obs_data, hector_data)

ggplot(data = comb_data, aes(x = year, y = value, color = scenario)) +
geom_ribbon(data =
filter(comb_data, scenario == "historical" & variable == GMST()),
aes(ymin = lower, ymax = upper),
fill = 'aquamarine1',
color = NA) +
geom_line() +
facet_wrap(~ variable, scales = "free") +
ggtitle("Comparing Parameterizations")
ggsave(OUTPUT, width = 15)
69 changes: 69 additions & 0 deletions scripts/initial_norm_exp.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Script to try normalizing the T and CO2 MSEs
# manuscript
# Author: Peter Scully
# Date: 6/13/24

### Constants and Imports ###

# Importing libraries
library(hector)

# Setting up file paths
COMP_DATA_DIR <- file.path(here::here(), "comparison_data")
SCRIPTS_DIR <- file.path(here::here(), "scripts")
RESULTS_DIR <- file.path(here::here(), "results")

CO2_PATH <- file.path(COMP_DATA_DIR,
"Supplementary_Table_UoM_GHGConcentrations-1-1-0_annualmeans_v23March2017.csv")
TEMP_PATH <-
file.path(COMP_DATA_DIR,
"HadCRUT.5.0.2.0.analysis.summary_series.global.annual.csv")

INI_FILE <- system.file("input/hector_ssp245.ini", package = "hector")
PARAMS <- c(BETA(), Q10_RH(), DIFFUSIVITY())

OUTPUT <- file.path(RESULTS_DIR, "initial_norm_exp.txt")


source(file.path(SCRIPTS_DIR, "major_functions.R"))

### Getting observational data ###
co2_data <- get_co2_data(CO2_PATH)
temp_data <- get_temp_data(TEMP_PATH)
obs_data <- rbind(co2_data, temp_data)

### Calling optim ###
best_pars <- run_optim(obs_data = obs_data,
ini_file = INI_FILE,
params = PARAMS,
lower = c(0.5 - 0.232, 2.2 - 0.44, 2.3 - 0.1),
upper = c(0.5 + 0.232, 2.2 + 0.44, 2.3 + 0.1),
yrs = 1750:2014,
vars = c(GMST(), CONCENTRATIONS_CO2()),
error_fn = mean_T_CO2_nmse,
method = "L-BFGS-B",
output_file = OUTPUT)

### Outputting individual MSEs ###
hector_data <- run_hector(ini_file = INI_FILE,
params = PARAMS,
vals = best_pars,
yrs = 1750:2014,
vars = c(GMST(), CONCENTRATIONS_CO2()))

T_mse <- get_var_mse(obs_data = obs_data,
hector_data = hector_data,
var = GMST(),
yrs = 1850:2014)
CO2_mse <- get_var_mse(obs_data = obs_data,
hector_data = hector_data,
var = CONCENTRATIONS_CO2(),
yrs = c(1750, 1850:2014))

write_metric("CO2 MSE:", CO2_mse, OUTPUT)
write_metric("T MSE: ", T_mse, OUTPUT)
write_metric("RMSE: ", sqrt(mean(CO2_mse, T_mse)), OUTPUT) # not 100% sure this is how we want to calculate this
write("", OUTPUT, append = TRUE)

### Outputting table metrics ###
calc_table_metrics(PARAMS, best_pars, OUTPUT)
14 changes: 7 additions & 7 deletions scripts/major_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,12 @@ get_co2_data <- function(file, scenario = "historical") {
# Fixing table formatting
co2_data <- na.omit(co2_data)
colnames(co2_data) <- c("year", "value")

# Adding in new columns to match Hector data frames
co2_data$scenario <- scenario
co2_data$variable <- CONCENTRATIONS_CO2()
co2_data$units <- " ppmv CO2"

return(co2_data)
}

Expand All @@ -74,18 +74,18 @@ get_co2_data <- function(file, scenario = "historical") {
get_temp_data <- function(file, scenario = "historical", include_unc = F) {
temp_data <- read.csv(file)
colnames(temp_data) <- c("year", "value", "lower", "upper")

# Adding in new columns to match Hector data frames
temp_data$scenario <- scenario
temp_data$variable <- GMST()
temp_data$units <- "degC"

# Removing upper and lower bounds, if desired
if (!include_unc) {
temp_data$lower <- NULL
temp_data$upper <- NULL
}

return(temp_data)
}

Expand Down Expand Up @@ -407,7 +407,7 @@ run_optim <- function(obs_data, ini_file, params, par = NULL, sd = NULL,
#
# returns: Nothing, but outputs table to given output file
calc_table_metrics <- function(params, vals, output_file) {

### KEY METRICS ###

## Finding TCRE ##
Expand Down Expand Up @@ -625,4 +625,4 @@ calc_table_metrics <- function(params, vals, output_file) {
quote = FALSE,
sep = "\t",
row.names = FALSE)
}
}
20 changes: 8 additions & 12 deletions tests/error_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
source(here::here("scripts", "error_functions.R"))

library(assertthat)
library(dplyr)

# Confirming that mse works as expected
mse_tests <- function() {
Expand Down Expand Up @@ -35,34 +36,29 @@ get_var_mse_tests <- function() {
assert_that(get_var_mse(obs_data,
hect_data,
var = "CO2_concentrations",
start = 2000,
end = 2015) == 0)
yrs = 2000:2015) == 0)

assert_that(get_var_mse(obs_data,
hect_data,
var = "CO2_concentrations",
start = 2000,
end = 2000) == 0)
yrs = 2000) == 0)

assert_that(get_var_mse(obs_data,
hect_data,
var = "GMST",
start = 2000,
end = 2000) == 1)
yrs = 2000) == 1)

assert_that(get_var_mse(obs_data,
hect_data,
var = "GMST",
start = 2000,
end = 2001) == 0.5)
yrs = 2000:2001) == 0.5)

assert_that(get_var_mse(obs_data,
hect_data,
var = "GMST",
start = 2000,
end = 2004) == 3)
yrs = 2000:2004) == 3)


}

mse_tests()
mse_tests()
get_var_mse_tests()

0 comments on commit fa4397a

Please sign in to comment.