diff --git a/CHANGELOG.md b/CHANGELOG.md index e1288e91..d18981b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # openproblems.bio unreleased +## MAJOR CHANGES + +* Migrated the result scaling from R to JavaScript to allow dynamically updating the results (PR #332). + ## MINOR CHANGES * Improve Equations visualisation (PR #329). diff --git a/results/_include/_load_data.qmd b/results/_include/_load_data.qmd index 53049052..0854315f 100644 --- a/results/_include/_load_data.qmd +++ b/results/_include/_load_data.qmd @@ -55,6 +55,15 @@ split_cite_fun <- function(keys) { aggregate_scores <- function(scaled_score) { mean(pmin(1, pmax(0, scaled_score)) %|% 0) } + + +ojs_define( + task_info = jsonlite::read_json(paste0(params$data_dir, "/task_info.json")), + dataset_info = jsonlite::read_json(paste0(params$data_dir, "/dataset_info.json")), + method_info = jsonlite::read_json(paste0(params$data_dir, "/method_info.json")), + metric_info = jsonlite::read_json(paste0(params$data_dir, "/metric_info.json")), + results = jsonlite::read_json(paste0(params$data_dir, "/results.json")) +) ``` diff --git a/results/_include/_summary_figure.qmd b/results/_include/_summary_figure.qmd index 283d3b1b..b49a2a93 100644 --- a/results/_include/_summary_figure.qmd +++ b/results/_include/_summary_figure.qmd @@ -1,221 +1,192 @@ -```{r funkyheatmap_data} -#| echo: false -#| message: false -#| warning: false - - -label_time <- function(time) { - case_when( - time < 1e-5 ~ "0s", - time < 1 ~ "<1s", - time < 60 ~ paste0(floor(time), "s"), - time < 3600 ~ paste0(floor(time / 60), "m"), - time < 3600 * 24 ~ paste0(floor(time / 3600), "h"), - time < 3600 * 24 * 7 ~ paste0(floor(time / 3600 / 24), "d"), - !is.na(time) ~ ">7d", - TRUE ~ NA_character_ +```{ojs} +//| echo: false +poss_dataset_ids = dataset_info + .map(d => d.dataset_id) + .filter(d => results.map(r => r.dataset_id).includes(d)) +poss_method_ids = method_info + .map(d => d.method_id) + .filter(d => results.map(r => r.method_id).includes(d)) +poss_metric_ids = metric_info + .map(d => d.metric_id) + .filter(d => results.map(r => Object.keys(r.scaled_scores)).flat().includes(d)) +``` + + +```{ojs} +//| echo: false +//| message: false +//| warning: false +results_long = results.flatMap(d => { + return Object.entries(d.scaled_scores).map(([metric_id, value]) => + ({ + method_id: d.method_id, + dataset_id: d.dataset_id, + metric_id: metric_id, + score: value + }) ) +}).filter(d => method_ids.includes(d.method_id) && metric_ids.includes(d.metric_id) && dataset_ids.includes(d.dataset_id)) + +results_resources = results.flatMap(d => { + return ({ + method_id: d.method_id, + dataset_id: d.dataset_id, + ...d.resources + }) +}) + +function label_time(time) { + if (time < 1e-5) return "0s"; + if (time < 1) return "<1s"; + if (time < 60) return `${Math.floor(time)}s`; + if (time < 3600) return `${Math.floor(time / 60)}m`; + if (time < 3600 * 24) return `${Math.floor(time / 3600)}h`; + if (time < 3600 * 24 * 7) return `${Math.floor(time / 3600 / 24)}d`; + return ">7d"; // Assuming missing values are encoded as NaN } -label_memory <- function(x, include_mb = FALSE) { - case_when( - x < 1e9 ~ "<1G", - x < 1e12 ~ paste0(round(x / 1e9), "G"), - !is.na(x) ~ ">1T", - TRUE ~ NA_character_ - ) +function label_memory(x_mb, include_mb = true) { + if (!include_mb && x_mb < 1e3) return "<1G"; + if (x_mb < 1) return "<1M"; + if (x_mb < 1e3) return `${Math.round(x_mb)}M`; + if (x_mb < 1e6) return `${Math.round(x_mb / 1e3)}G`; + if (x_mb < 1e9) return `${Math.round(x_mb / 1e6)}T`; + return ">1P"; } -overall <- results_long %>% - group_by(method_id) %>% - summarise(mean_score = aggregate_scores(score), .groups = "drop") %>% - arrange(mean_score) -per_dataset <- results_long %>% - group_by(method_id, dataset_id) %>% - summarise(score = aggregate_scores(score), .groups = "drop") %>% - mutate(dataset_id = paste0("dataset_", dataset_id)) %>% - spread(dataset_id, score) -per_metric <- results_long %>% - group_by(method_id, metric_id) %>% - summarise(score = aggregate_scores(score), .groups = "drop") %>% - mutate(metric_id = paste0("metric_", metric_id)) %>% - spread(metric_id, score) - -results_resources <- results %>% - select(method_id, dataset_id, resources) %>% - unnest(resources) - -# exit code is missing from openproblems-v1 -if (! "exit_code" %in% colnames(results_resources)) { - results_resources <- results_resources %>% - mutate(exit_code = ifelse(is.na(duration_sec), 1L, 0L)) +function aggregate_scores(obj) { + return d3.mean(obj.map(val => { + if (val.score === undefined || isNaN(val.score)) return 0; + return Math.min(1, Math.max(0, val.score)) + })); } -resources <- results_resources %>% - group_by(method_id) %>% - summarise( - error_pct_oom = mean(exit_code %|% 0 %in% c(137)), - error_pct_timeout = mean(exit_code %|% 0 %in% c(143)), - error_pct_error = mean(exit_code %|% 0 != 0) - error_pct_oom - error_pct_timeout, - error_pct_ok = 1 - error_pct_oom - error_pct_timeout - error_pct_error, - error_reason = list(c( - "Memory limit exceeded" = error_pct_oom, - "Time limit exceeded" = error_pct_timeout, - "Execution error" = error_pct_error, - "No error" = error_pct_ok - )), - mean_cpu_pct = mean(cpu_pct, na.rm = TRUE), - mean_peak_memory_b = mean(peak_memory_mb, na.rm = TRUE) * 1000, - mean_peak_memory_log = -log10(mean_peak_memory_b), - mean_peak_memory_str = label_memory(mean_peak_memory_b * 1000), - mean_disk_read_b = mean(disk_read_mb, na.rm = TRUE) * 1000, - mean_disk_read_log = -log10(mean_disk_read_b), - mean_disk_read_str = label_memory(mean_disk_read_b * 1000), - mean_disk_write_mb = mean(disk_write_mb, na.rm = TRUE) * 1000, - mean_disk_write_log = -log10(mean_disk_write_mb), - mean_disk_write_str = label_memory(mean_disk_write_mb * 1000), - mean_duration_sec = mean(duration_sec %|% 0), - mean_duration_log = -log10(mean_duration_sec), - mean_duration_str = label_time(mean_duration_sec), - .groups = "drop" - ) %>% - mutate_at(vars(ends_with("_str")), function(x) paste0(" ", x, " ")) - -summary_all <- - method_info %>% - filter(!is_baseline) %>% - select(method_id, method_name) %>% - inner_join(overall, by = "method_id") %>% - left_join(per_dataset, by = "method_id") %>% - left_join(per_metric, by = "method_id") %>% - left_join(resources, by = "method_id") %>% - arrange(desc(method_id)) - -column_info <- - bind_rows( - tribble( - ~id, ~name, ~group, ~geom, ~palette, - "method_name", "Name", "method", "text", NA_character_, - "mean_score", "Score", "overall", "bar", "overall", - "error_reason", "Error reason", "overall", "pie", "error_reason" - ), - dataset_info %>% transmute( - id = paste0("dataset_", dataset_id), - name = dataset_name, - group = "dataset", - geom = "funkyrect", - palette = "dataset" - ), - metric_info %>% transmute( - id = paste0("metric_", metric_id), - name = metric_name, - group = "metric", - geom = "funkyrect", - palette = "metric" - ), - tribble( - ~id, ~name, ~label, ~geom, - "mean_cpu_pct", "%CPU", NA_character_, "funkyrect", - "mean_peak_memory_log", "Peak memory", "mean_peak_memory_str", "rect", - "mean_disk_read_log", "Disk read", "mean_disk_read_str", "rect", - "mean_disk_write_log", "Disk write", "mean_disk_write_str", "rect", - "mean_duration_log", "Duration", "mean_duration_str", "rect" - ) %>% mutate( - group = "resources", - palette = "resources" - ) - ) %>% - mutate( - options = map2(id, geom, function(id, geom) { - if (id == "method_name") { - list(width = 15, hjust = 0) - } else if (id == "is_baseline") { - list(width = 1) - } else if (geom == "bar") { - list(width = 4) - } else { - list() - } - } - ) -) -column_groups <- tribble( - ~group, ~palette, ~level1, - "method", NA_character_, "", - "overall", "overall", "Overall", - "error_reason", "error_reason", "Error reason" -) -if (nrow(dataset_info) >= 3) { - column_groups <- column_groups %>% - add_row(level1 = "Datasets", group = "dataset", palette = "dataset") -} else { - column_groups <- column_groups %>% - add_row(level1 = "", group = "dataset", palette = NA_character_) +function mean_na_rm(x) { + return d3.mean(x.filter(d => !isNaN(d))); } -if (nrow(metric_info) >= 3) { - column_groups <- column_groups %>% - add_row(level1 = "Metrics", group = "metric", palette = "metric") -} else { - column_groups <- column_groups %>% - add_row(level1 = "", group = "metric", palette = NA_character_) + +function transpose_list_of_objects(list) { + return Object.fromEntries(Object.keys(list[0]).map(key => [key, list.map(d => d[key])])) } -column_groups <- column_groups %>% - add_row(level1 = "Resources", group = "resources", palette = "resources") - -palettes <- list( - overall = "Greys", - dataset = "Blues", - metric = "Reds", - resources = "YlOrBr", - error_reason = list( - colors = c("#8DD3C7", "#FFFFB3", "#BEBADA", "#FFFFFF"), - names = c("Memory limit exceeded", "Time limit exceeded", "Execution error", "No error") - ) -) -# g_all <- funky_heatmap( -# data = summary_all, -# column_info = column_info %>% filter(id %in% colnames(summary_all)), -# column_groups = column_groups, -# palettes = palettes, -# position_args = position_arguments( -# # determine xmax expand heuristically -# expand_xmax = max(str_length(tail(column_info$name, 4))) / 5, -# # determine offset heuristically -# col_annot_offset = max(str_length(column_info$name)) / 5 -# ), -# add_abc = FALSE, -# scale_column = FALSE -# ) - -ojs_define( - method_info = method_info, - funky_heatmap_args = list( - data = summary_all, - columns = intersect(column_info$id, colnames(summary_all)), - column_info = column_info %>% filter(id %in% colnames(summary_all)), - column_groups = column_groups, - palettes = palettes, - expand = c(xmax = 3), - col_annot_offset = 5, - add_abc = FALSE, - scale_column = FALSE - ) -) -``` +overall = d3.groups(results_long, d => d.method_id) + .map(([method_id, values]) => ({method_id, mean_score: aggregate_scores(values)})) -```{ojs} -console.log(funky_heatmap_args.data) -``` +per_dataset = d3.groups(results_long, d => d.method_id) + .map(([method_id, values]) => { + const datasets = d3.groups(values, d => d.dataset_id) + .map(([dataset_id, values]) => ({["dataset_" + dataset_id]: aggregate_scores(values)})) + .reduce((a, b) => ({...a, ...b}), {}) + return {method_id, ...datasets} + }) +per_metric = d3.groups(results_long, d => d.method_id) + .map(([method_id, values]) => { + const metrics = d3.groups(values, d => d.metric_id) + .map(([metric_id, values]) => ({["metric_" + metric_id]: aggregate_scores(values)})) + .reduce((a, b) => ({...a, ...b}), {}) + return {method_id, ...metrics} + }) -```{ojs} -//| echo: false -//| panel: input -//| layout-ncol: 2 -viewof color_by_rank = Inputs.toggle({label: "Color by rank", value: true}) -viewof scale_column = Inputs.toggle({label: "Minmax column", value: false}) +resources = d3.groups(results_resources, d => d.method_id) + .map(([method_id, values]) => { + const error_pct_oom = d3.mean(values, d => d.exit_code === 137) + const error_pct_timeout = d3.mean(values, d => d.exit_code === 143) + const error_pct_error = d3.mean(values, d => d.exit_code > 0) - error_pct_oom - error_pct_timeout + const error_pct_ok = 1 - error_pct_oom - error_pct_timeout - error_pct_error + const mean_peak_memory_mb = mean_na_rm(values.map(d => d.peak_memory_mb)) + const mean_disk_read_mb = mean_na_rm(values.map(d => d.disk_read_mb)) + const mean_disk_write_mb = mean_na_rm(values.map(d => d.disk_write_mb)) + const mean_duration_sec = mean_na_rm(values.map(d => d.duration_sec)) + return ({ + method_id, + error_pct_error, + error_pct_oom, + error_pct_timeout, + error_pct_ok, + // error_reason: { + // "Memory limit exceeded": error_pct_oom, + // "Time limit exceeded": error_pct_timeout, + // "Execution error": error_pct_error, + // "No error": error_pct_ok + // }, + error_reason: [error_pct_oom, error_pct_timeout, error_pct_error, error_pct_ok], + mean_cpu_pct: mean_na_rm(values.map(d => d.cpu_pct)), + mean_peak_memory_mb, + mean_peak_memory_log: -Math.log10(mean_peak_memory_mb), + mean_peak_memory_str: " " + label_memory(mean_peak_memory_mb) + " ", + mean_disk_read_mb: mean_na_rm(values.map(d => d.disk_read_mb)), + mean_disk_read_log: -Math.log10(mean_disk_read_mb), + mean_disk_read_str: " " + label_memory(mean_disk_read_mb) + " ", + mean_disk_write_mb: mean_na_rm(values.map(d => d.disk_write_mb)), + mean_disk_write_log: -Math.log10(mean_disk_write_mb), + mean_disk_write_str: " " + label_memory(mean_disk_write_mb) + " ", + mean_duration_sec, + mean_duration_log: -Math.log10(mean_duration_sec), + mean_duration_str: " " + label_time(mean_duration_sec) + " " + }) + }) + +summary_all = method_info + .filter(d => show_con || !d.is_baseline) + .filter(d => method_ids.includes(d.method_id)) + .map(method => { + const method_id = method.method_id + const method_name = method.method_name + const mean_score = overall.find(d => d.method_id === method_id).mean_score + const datasets = per_dataset.find(d => d.method_id === method_id) + const metrics = per_metric.find(d => d.method_id === method_id) + const resources_ = resources.find(d => d.method_id === method_id) + return {method_id, method_name, mean_score, ...datasets, ...metrics, ...resources_} + }) + .sort((a, b) => b.mean_score - a.mean_score) + +// make sure the first entry contains all columns +column_info = [ + {id: "method_name", name: "Name", label: null, group: "method", geom: "text", palette: null}, + {id: "mean_score", name: "Score", group: "overall", geom: "bar", palette: "overall"}, + {id: "error_reason", name: "Error reason", group: "overall", geom: "pie", palette: "error_reason"}, + ...dataset_info.filter(d => dataset_ids.includes(d.dataset_id)).map(d => ({id: "dataset_" + d.dataset_id, name: d.dataset_name, group: "dataset", geom: "funkyrect", palette: "dataset"})), + ...metric_info.filter(d => metric_ids.includes(d.metric_id)).map(d => ({id: "metric_" + d.metric_id, name: d.metric_name, group: "metric", geom: "funkyrect", palette: "metric"})), + {id: "mean_cpu_pct", name: "%CPU", group: "resources", geom: "funkyrect", palette: "resources"}, + {id: "mean_peak_memory_log", name: "Peak memory", label: "mean_peak_memory_str", group: "resources", geom: "rect", palette: "resources"}, + {id: "mean_disk_read_log", name: "Disk read", label: "mean_disk_read_str", group: "resources", geom: "rect", palette: "resources"}, + {id: "mean_disk_write_log", name: "Disk write", label: "mean_disk_write_str", group: "resources", geom: "rect", palette: "resources"}, + {id: "mean_duration_log", name: "Duration", label: "mean_duration_str", group: "resources", geom: "rect", palette: "resources"} +].map(d => { + if (d.id === "method_name") { + return {...d, options: {width: 15, hjust: 0}} + } else if (d.id === "is_baseline") { + return {...d, options: {width: 1}} + } else if (d.geom === "bar") { + return {...d, options: {width: 4}} + } else { + return d + } +}) + +column_groups = [ + {group: "method", palette: null, level1: ""}, + {group: "overall", palette: "overall", level1: "Overall"}, + {group: "error_reason", palette: "error_reason", level1: "Error reason"}, + {group: "dataset", palette: "dataset", level1: dataset_info.length >= 3 ? "Datasets" : ""}, + {group: "metric", palette: "metric", level1: metric_info.length >= 3 ? "Metrics" : ""}, + {group: "resources", palette: "resources", level1: "Resources"} +] + +palettes = [ + { + overall: "Greys", + dataset: "Blues", + metric: "Reds", + resources: "YlOrBr", + error_reason: { + colors: ["#8DD3C7", "#FFFFB3", "#BEBADA", "#FFFFFF"], + names: ["Memory limit exceeded", "Time limit exceeded", "Execution error", "No error"] + } + } +][0] ``` ```{ojs} @@ -223,12 +194,12 @@ viewof scale_column = Inputs.toggle({label: "Minmax column", value: false}) //| fig-cap: "Overview of the results per method. This figures shows the mean of the scaled scores (group Overall), the mean scores per dataset (group Dataset) and the mean scores per metric (group Metric)." //| column: page funkyheatmap( - funky_heatmap_args.data, - funky_heatmap_args.column_info, + transpose_list_of_objects(summary_all), + transpose_list_of_objects(column_info), [], - funky_heatmap_args.column_groups, + transpose_list_of_objects(column_groups), [], - funky_heatmap_args.palettes, + palettes, { fontSize: 14, rowHeight: 26, @@ -247,6 +218,72 @@ funkyheatmap( ); ``` + +