From 0d7e464f2db1b43e2f5da26c481bb0cb6ebd8b42 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Apr 2023 07:50:18 +0200 Subject: [PATCH 01/38] define SIC data structure --- inst/extdata/config/data-sources.json | 392 ++++++++++++++++++++++++++ 1 file changed, 392 insertions(+) diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index a7ceea6e..12e66608 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -9385,5 +9385,397 @@ } } } + }, + { + "name": "sic", + "url": "https://physionet.org/content/sicdb/1.0.5/", + "id_cfg": { + "patient": { + "id": "patientid", + "position": 1, + "start": "firstadmission", + "end": "offsetofdeath", + "table": "cases" + }, + "icustay": { + "id": "caseid", + "position": 2, + "start": "offsetafterfirstadmission", + "end": "timeofstay", + "table": "cases" + } + }, + "tables": { + "cases": { + "files": "cases.csv.gz", + "defaults": { + "index_var": "offsetafterfirstadmission", + "time_vars": ["offsetafterfirstadmission"] + }, + "cols": { + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "patientid": { + "name": "PatientID", + "spec": "col_integer" + }, + "admissionyear": { + "name": "AdmissionYear", + "spec": "col_integer" + }, + "timeofstay": { + "name": "TimeOfStay", + "spec": "col_integer" + }, + "saps3": { + "name": "saps3", + "spec": "col_double" + }, + "hospitaldischargetype": { + "name": "HospitalDischargeType", + "spec": "col_integer" + }, + "dischargestate": { + "name": "DischargeState", + "spec": "col_integer" + }, + "dischargeunit": { + "name": "DischargeUnit", + "spec": "col_integer" + }, + "offsetofdeath": { + "name": "OffsetOfDeath", + "spec": "col_integer" + }, + "estimatedsurvivalobservationtime": { + "name": "EstimatedSurvivalObservationTime", + "spec": "col_integer" + }, + "sex": { + "name": "Sex", + "spec": "col_integer" + }, + "weightonadmission": { + "name": "WeightOnAdmission", + "spec": "col_double" + }, + "heightonadmission": { + "name": "HeightOnAdmission", + "spec": "col_double" + }, + "ageonadmission": { + "name": "AgeOnAdmission", + "spec": "col_integer" + }, + "hospitalunit": { + "name": "HospitalUnit", + "spec": "col_integer" + }, + "referringunit": { + "name": "ReferringUnit", + "spec": "col_integer" + }, + "icd10main": { + "name": "ICD10Main", + "spec": "col_character" + }, + "icd10maintext": { + "name": "ICD10MainText", + "spec": "col_character" + }, + "diagnosist2": { + "name": "DiagnosisT2", + "spec": "col_character" + }, + "surgicalsite": { + "name": "SurgicalSite", + "spec": "col_integer" + }, + "hoursofcrrt": { + "name": "HoursOfCRRT", + "spec": "col_integer" + }, + "admissionformhassepsis": { + "name": "AdmissionFormHasSepsis", + "spec": "col_integer" + }, + "orbisdataavailable": { + "name": "OrbisDataAvailable", + "spec": "col_character" + }, + "heartsurgeryadditionaldata": { + "name": "HeartSurgeryAdditionalData", + "spec": "col_integer" + }, + "heartsurgerycrossclamptime": { + "name": "HeartSurgeryCrossClampTime", + "spec": "col_integer" + }, + "heartsurgerybeginoffset": { + "name": "HeartSurgeryBeginOffset", + "spec": "col_integer" + }, + "heartsurgeryendoffset": { + "name": "HeartSurgeryEndOffset", + "spec": "col_integer" + }, + "offsetafterfirstadmission": { + "name": "OffsetAfterFirstAdmission", + "spec": "col_integer" + } + } + }, + "d_references": { + "files": "d_references.csv.gz", + "cols": { + "referenceglobalid": { + "name": "ReferenceGlobalID", + "spec": "col_integer" + }, + "referencevalue": { + "name": "ReferenceValue", + "spec": "col_character" + }, + "referencename": { + "name": "ReferenceName", + "spec": "col_character" + }, + "referencedescription": { + "name": "ReferenceDescription", + "spec": "col_character" + }, + "referenceunit": { + "name": "ReferenceUnit", + "spec": "col_character" + }, + "referenceorder": { + "name": "ReferenceOrder", + "spec": "col_integer" + }, + "referencetype": { + "name": "ReferenceType", + "spec": "col_integer" + }, + "data": { + "name": "Data", + "spec": "col_character" + } + } + }, + "data_float_h": { + "files": "data_float_h.csv.gz", + "defaults": { + "index_var": "offset", + "time_vars": ["offset"] + }, + "cols": { + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "dataid": { + "name": "DataID", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "val": { + "name": "Val", + "spec": "col_double" + }, + "cnt": { + "name": "cnt", + "spec": "col_integer" + }, + "rawdata": { + "name": "rawdata", + "spec": "col_character" + } + }, + "partitioning": { + "col": "dataid", + "breaks": [702, 703, 705, 708, 709, 710, 715, 717, 719, 724, 725, + 731, 773, 2018, 2274, 2278, 2280, 2283, 2290, 3056, 3059, 3071] + } + }, + "data_ref": { + "files": "data_ref.csv.gz", + "defaults": { + "index_var": "offsetafterfirstadmission", + "time_vars": ["offsetafterfirstadmission"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "refid": { + "name": "RefID", + "spec": "col_integer" + }, + "customfieldid": { + "name": "CustomFieldID", + "spec": "col_integer" + } + } + }, + "laboratory": { + "files": "laboratory.csv.gz", + "defaults": { + "index_var": "offset", + "val_var": "laboratoryvalue", + "time_vars": ["offset"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "laboratoryid": { + "name": "LaboratoryID", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "laboratoryvalue": { + "name": "LaboratoryValue", + "spec": "col_double" + }, + "laboratorytype": { + "name": "LaboratoryType", + "spec": "col_integer" + } + } + }, + "medication": { + "files": "medication.csv.gz", + "defaults": { + "index_var": "offset", + "time_vars": ["offset", "offsetdrugend"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "patientid": { + "name": "PatientID", + "spec": "col_integer" + }, + "drugid": { + "name": "DrugID", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "offsetdrugend": { + "name": "OffsetDrugEnd", + "spec": "col_integer" + }, + "issingledose": { + "name": "IsSingleDose", + "spec": "col_logical" + }, + "amount": { + "name": "Amount", + "spec": "col_double" + }, + "amountperminute": { + "name": "AmountPerMinute", + "spec": "col_double" + }, + "givenstate": { + "name": "GivenState", + "spec": "col_integer" + } + } + }, + "data_range": { + "files": "data_range.csv.gz", + "defaults": { + "index_var": "offset", + "time_vars": ["offset", "offsetend"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "dataid": { + "name": "DataID", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "offsetend": { + "name": "OffsetEnd", + "spec": "col_integer" + }, + "data": { + "name": "Data", + "spec": "col_character" + } + } + }, + "unitlog": { + "files": "unitlog.csv.gz", + "defaults": { + "index_var": "offset", + "time_vars": ["offset", "offsetend"] + }, + "cols": { + "id": { + "name": "id", + "spec": "col_integer" + }, + "caseid": { + "name": "CaseID", + "spec": "col_integer" + }, + "patientid": { + "name": "PatientID", + "spec": "col_integer" + }, + "logstate": { + "name": "LogState", + "spec": "col_integer" + }, + "offset": { + "name": "Offset", + "spec": "col_integer" + }, + "hospitalunit": { + "name": "HospitalUnit", + "spec": "col_integer" + } + } + } + } } ] From 9ffcb4d3ffcc75b68445556b7d2bab10ca9d8cd2 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Apr 2023 07:50:38 +0200 Subject: [PATCH 02/38] add SIC to auto attach --- R/utils-file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/utils-file.R b/R/utils-file.R index a970f3f5..3f961d62 100644 --- a/R/utils-file.R +++ b/R/utils-file.R @@ -194,7 +194,7 @@ auto_attach_srcs <- function() { res <- sys_env("RICU_SRC_LOAD", unset = NA_character_) if (is.na(res)) { - c("mimic", "mimic_demo", "eicu", "eicu_demo", "hirid", "aumc", "miiv") + c("mimic", "mimic_demo", "eicu", "eicu_demo", "hirid", "aumc", "miiv", "sic") } else { strsplit(res, ",")[[1L]] } From 2884e3f0e55b95fd5af6d7f39564e18a05a96f78 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Apr 2023 07:50:55 +0200 Subject: [PATCH 03/38] add SIC loading helpers --- R/data-load.R | 11 +++++++++++ R/data-utils.R | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ R/utils-misc.R | 2 ++ 3 files changed, 65 insertions(+) diff --git a/R/data-load.R b/R/data-load.R index 711f6d43..316aeb22 100644 --- a/R/data-load.R +++ b/R/data-load.R @@ -168,6 +168,17 @@ load_difftime.miiv_tbl <- function(x, rows, cols = colnames(x), load_mihi(x, {{ rows }}, cols, id_hint, time_vars) } +#' @rdname load_src +#' @export +load_difftime.sic_tbl <- function(x, rows, cols = colnames(x), + id_hint = id_vars(x), + time_vars = ricu::time_vars(x), ...) { + + warn_dots(...) + # TODO: consider renaming fun to reflect its use for SICdb + load_eiau(x, {{ rows }}, cols, id_hint, time_vars, s_as_mins) +} + #' @rdname load_src #' @export load_difftime.character <- function(x, src, ...) { diff --git a/R/data-utils.R b/R/data-utils.R index 5dcac99a..f880634a 100644 --- a/R/data-utils.R +++ b/R/data-utils.R @@ -129,6 +129,29 @@ id_orig_helper.miiv_env <- function(x, id) { as_id_tbl(res, id, by_ref = TRUE) } +#' @rdname data_utils +#' @export +id_orig_helper.sic_env <- function(x, id) { + + if (!identical(id, "patientid")) { + return(NextMethod()) + } + + cfg <- as_id_cfg(x)[id == id_var_opts(x)] + + assert_that(length(cfg) == 1L) + + sta <- field(cfg, "start") + age <- "admissionyear" + + res <- as_src_tbl(x, field(cfg, "table")) + res <- res[, c(id, sta, age)] + res <- res[, c(sta, age) := shift_year(get(sta), get(age))] + + as_id_tbl(res, id, by_ref = TRUE) +} + + #' @export id_orig_helper.default <- function(x, ...) stop_generic(x, .Generic) @@ -332,6 +355,35 @@ id_win_helper.miiv_env <- function(x) { order_rename(res, ids, sta, end) } +#' @rdname data_utils +#' @export +id_win_helper.sic_env <- function(x) { + cfg <- sort(as_id_cfg(x), decreasing = TRUE) + + ids <- field(cfg, "id") + sta <- field(cfg, "start") + end <- field(cfg, "end") + + tbl <- as_src_tbl(x, unique(field(cfg, "table"))) + + mis <- setdiff(sta, colnames(tbl)) + + res <- load_src(tbl, cols = c(ids, intersect(sta, colnames(tbl)), end)) + + assert_that(length(mis) == 1L) + res[, firstadmission := 0L] + + res <- res[, c(sta, end) := lapply(.SD, s_as_mins), .SDcols = c(sta, end)] + res[, timeofstay := offsetafterfirstadmission + timeofstay] + + res <- setcolorder(res, c(ids, sta, end)) + res <- rename_cols(res, c(ids, paste0(ids, "_start"), + paste0(ids, "_end")), by_ref = TRUE) + + as_id_tbl(res, ids[2L], by_ref = TRUE) +} + + #' @export id_win_helper.default <- function(x) stop_generic(x, .Generic) diff --git a/R/utils-misc.R b/R/utils-misc.R index d3a1de5f..8c14e4d3 100644 --- a/R/utils-misc.R +++ b/R/utils-misc.R @@ -245,6 +245,8 @@ cat_line <- function(...) { ms_as_mins <- function(x) min_as_mins(as.integer(x / 6e4)) +s_as_mins <- function(x) min_as_mins(as.integer(x / 60)) + min_as_mins <- function(x) as.difftime(x, units = "mins") digest_lst <- function(x) as.character(openssl::md5(serialize(x, NULL))) From 2b39787b60fa2269c07db47a2f9d84c5309f14ff Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:22:19 +0200 Subject: [PATCH 04/38] add callback hooks to postprocess tbls on import --- R/setup-import.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index 95640867..28e9c3af 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -257,7 +257,8 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, rawf <- raw_file_name(x) file <- file.path(dir, rawf) name <- tbl_name(x) - + callback <- tbl_callback(x) + exp_row <- n_row(x) if (is.na(exp_row)) { @@ -268,17 +269,18 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, if (length(file) == 1L) { - callback <- function(x, pos, ...) { - report_problems(x, rawf) - split_write(x, pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, - progress, name, tick) + process_chunk <- function(x, pos, ...) { + report_problems(x, rawf) + split_write(callback(x), pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, + progress, name, tick) } + if (grepl("\\.gz$", file)) { file <- gunzip(file, tempdir) } - readr::read_csv_chunked(file, callback, chunk_length, col_types = spec, + readr::read_csv_chunked(file, process_chunk, chunk_length, col_types = spec, progress = FALSE, ...) if (is.na(exp_row)) { @@ -291,8 +293,7 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, dat <- readr::read_csv(file[i], col_types = spec, progress = FALSE, ...) report_problems(dat, rawf[i]) - - split_write(dat, pfun, tempdir, i, progress, name, tick) + split_write(callback(dat), pfun, tempdir, i, progress, name, tick) } } @@ -356,6 +357,7 @@ csv_to_fst <- function(x, dir, progress = NULL, ...) { raw <- raw_file_name(x) src <- file.path(dir, raw) dst <- file.path(dir, fst_file_name(x)) + callback <- tbl_callback(x) assert_that(length(x) == 1L, length(src) == 1L, length(dst) == 1L) @@ -365,6 +367,7 @@ csv_to_fst <- function(x, dir, progress = NULL, ...) { report_problems(dat, raw) + dat <- callback(dat) dat <- rename_cols(setDT(dat), ricu_cols(x), orig_cols(x)) fst::write_fst(dat, dst, compress = 100L) From d459959c85853dec2c703569f27a80409b2e2f54 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:24:28 +0200 Subject: [PATCH 05/38] add callback to deserialise sicdb data_float_h --- R/callback-tbl.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 R/callback-tbl.R diff --git a/R/callback-tbl.R b/R/callback-tbl.R new file mode 100644 index 00000000..6777e073 --- /dev/null +++ b/R/callback-tbl.R @@ -0,0 +1,24 @@ + +sic_data_float_h <- function(dat, ...) { + hexstring_to_float <- function(x) { + if (is.na(x)) { + return(NA_real_) + } + hexstring <- substring(x, seq(1, 482, 2), seq(2, 482, 2)) + bytes <- as.raw(strtoi(hexstring[-1], base = 16)) + floats <- readBin(bytes, numeric(), length(bytes) %/% 4, 4, endian = "little") + ifelse(floats == 0, NA_real_, floats) + } + + setDT(dat) + dat[, c("rawdata") := lapply(get("rawdata"), hexstring_to_float)] # TODO: remove hard coding of rawdata and derive from JSON config + dat <- dat[, .( + Offset = Offset + 60 * (0:(sapply(rawdata, length)-1)), + Val = Val, + cnt = cnt, + rawdata = unlist(rawdata) + ), + by = .(id, CaseID, DataID) + ] + dat +} \ No newline at end of file From 32b8f60a5a27aeed5bdd3858f3cba2328d5d2d6b Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:28:35 +0200 Subject: [PATCH 06/38] add missing tbl_callback function --- R/config-utils.R | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/R/config-utils.R b/R/config-utils.R index faf09d75..055b12ad 100644 --- a/R/config-utils.R +++ b/R/config-utils.R @@ -387,6 +387,17 @@ partition_col <- function(x, orig_names = FALSE) { col } +tbl_callback <- function(x){ + x <- as_tbl_cfg(x) + assert_that(length(x) == 1L) + + if ("callback" %in% vctrs::fields(x)) { + str_to_fun(vctrs::field(x, "callback")) + } else { + identity_callback + } +} + #' @export n_tick.tbl_cfg <- function(x) { From 6afe44c91c5a8c8916a6f26e8ac0d54e7a8f2a10 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:28:40 +0200 Subject: [PATCH 07/38] add sic_itm inspired by hrd_itm --- R/concept-utils.R | 52 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/R/concept-utils.R b/R/concept-utils.R index 114bd1ea..6fb0c339 100644 --- a/R/concept-utils.R +++ b/R/concept-utils.R @@ -200,6 +200,30 @@ get_hirid_ids <- function(x, ids) { load_id("variables", x, .data$id %in% .env$ids, cols = "unit", id_var = "id") } +#' @rdname data_items +#' @export +init_itm.sic_itm <- function(x, table, sub_var, ids, + callback = "identity_callback", ...) { + + assert_that(is.string(table), has_length(ids), + is.character(ids) || is_intish(ids)) + + x[["table"]] <- table + + units <- get_sic_ids(x, ids) + units <- rename_cols(rm_na(units), sub_var, "referenceglobalid") + + todo <- c("ids", "units") + x[todo] <- mget(todo) + + complete_tbl_itm(x, callback, sub_var, ...) +} + +get_sic_ids <- function(x, ids) { + load_id("d_references", x, .data$referenceglobalid %in% .env$ids, cols = "referenceunit", id_var = "referenceglobalid") +} + + #' @param unit_val String valued unit to be used in case no `unit_var` is #' available for the given table #' @@ -331,6 +355,10 @@ prepare_query.sel_itm <- prep_sel #' @export prepare_query.hrd_itm <- prep_sel +#' @keywords internal +#' @export +prepare_query.sic_itm <- prep_sel + #' @keywords internal #' @export prepare_query.rgx_itm <- function(x) { @@ -547,6 +575,17 @@ do_callback.hrd_itm <- function(x, ...) { NextMethod() } +#' @keywords internal +#' @export +do_callback.sic_itm <- function(x, ...) { + # TODO: generalise and combine with do_callback.hrd_itm + if (is.null(get_itm_var(x, "unit_var"))) { + x <- try_add_vars(x, unit_var = "referenceunit") + } + + NextMethod() +} + #' @keywords internal #' @export do_callback.col_itm <- function(x, ...) { @@ -605,6 +644,19 @@ do_itm_load.hrd_itm <- function(x, id_type = "icustay", interval = hours(1L)) { res } +#' @export +do_itm_load.sic_itm <- function(x, id_type = "icustay", interval = hours(1L)) { + + res <- NextMethod() + + if (is.null(get_itm_var(x, "unit_var"))) { + unt <- x[["units"]] + res <- merge(res, unt, by = get_itm_var(x, "sub_var"), all.x = TRUE) + } + + res +} + #' @export do_itm_load.col_itm <- function(x, id_type = "icustay", interval = hours(1L)) { From bc41f88e20bd2f4a376946421157dc39e0e19a4b Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:30:22 +0200 Subject: [PATCH 08/38] adjust data_float_h config to recent changes --- inst/extdata/config/data-sources.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index 12e66608..d7ef2cda 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -9568,6 +9568,7 @@ "files": "data_float_h.csv.gz", "defaults": { "index_var": "offset", + "val_var": "rawdata", "time_vars": ["offset"] }, "cols": { @@ -9600,7 +9601,8 @@ "col": "dataid", "breaks": [702, 703, 705, 708, 709, 710, 715, 717, 719, 724, 725, 731, 773, 2018, 2274, 2278, 2280, 2283, 2290, 3056, 3059, 3071] - } + }, + "callback": "sic_data_float_h" }, "data_ref": { "files": "data_ref.csv.gz", From b53c592f50ae56f4dbd160cd2ea05dbb4e1293f3 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Thu, 11 May 2023 07:33:27 +0200 Subject: [PATCH 09/38] add hr and crea as examples for sicdb --- inst/extdata/config/concept-dict.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index 10fd7f3c..91c7f4a0 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -1375,6 +1375,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 367, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3060,6 +3068,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 708, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, From 3ca5b04201e38a3561fb7557cb1fa56bcf2a50fa Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 09:30:16 +0200 Subject: [PATCH 10/38] add sex and death concepts for sic --- R/callback-itm.R | 17 +++++++++++++++++ inst/extdata/config/concept-dict.json | 18 ++++++++++++++++++ inst/extdata/config/data-sources.json | 2 +- 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/R/callback-itm.R b/R/callback-itm.R index 5f2521be..7f4d48ba 100644 --- a/R/callback-itm.R +++ b/R/callback-itm.R @@ -195,6 +195,16 @@ mimic_age <- function(x) { eicu_age <- function(x) as.numeric(ifelse(x == "> 89", 90, x)) +sic_sex <- function(x) { + ifelse( + x == 735, + "Male", + ifelse(x == 736, + "Female", + NA_character_ + )) +} + hirid_death <- function(x, val_var, sub_var, env, ...) { dis <- "discharge_status" @@ -747,6 +757,13 @@ aumc_death <- function(x, val_var, ...) { x } +sic_death <- function(x, val_var, adm_time, ...) { + idx <- index_var(x) + + x <- x[, c(val_var) := is_true(get(idx) - (get(adm_time) + secs(get(val_var))) < hours(72L))] + x +} + aumc_bxs <- function(x, val_var, dir_var, ...) { x <- x[get(dir_var) == "-", c(val_var) := -1L * get(val_var)] x diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index 91c7f4a0..a98d6bd2 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -1578,6 +1578,16 @@ "callback": "transform_fun(comp_na(`==`, 1L))", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "index_var": "offsetofdeath", + "adm_time": "offsetafterfirstadmission", + "val_var": "timeofstay", + "callback": "sic_death", + "class": "col_itm" + } ] } }, @@ -5334,6 +5344,14 @@ "callback": "apply_map(c(M = 'Male', F = 'Female'))", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "val_var": "sex", + "class": "col_itm", + "callback": "transform_fun(sic_sex)" + } ] } }, diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index d7ef2cda..d74cda96 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -9410,7 +9410,7 @@ "files": "cases.csv.gz", "defaults": { "index_var": "offsetafterfirstadmission", - "time_vars": ["offsetafterfirstadmission"] + "time_vars": ["offsetafterfirstadmission", "offsetofdeath"] }, "cols": { "caseid": { From 9396da564069a9a1834edcbc6383e573965c1bb6 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 09:36:44 +0200 Subject: [PATCH 11/38] add vitals, labs, height, and weight concepts for sic --- inst/extdata/config/concept-dict.json | 457 +++++++++++++++++++++++++- 1 file changed, 456 insertions(+), 1 deletion(-) diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index a98d6bd2..0ce2e180 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -369,6 +369,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 171, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -428,6 +436,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 609, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -487,6 +503,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 617, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -546,6 +570,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 616, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -612,6 +644,21 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 174, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 295, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ] } }, @@ -674,6 +721,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 449, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -734,6 +789,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 456, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -796,6 +859,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 333, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -858,6 +929,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 332, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1051,6 +1130,15 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 457, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 4), 'mg/dL', 'mmol/l')", + "class": "sic_itm" + } ] } }, @@ -1111,6 +1199,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 452, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1170,6 +1266,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 611, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1229,6 +1333,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 253, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1289,6 +1401,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 450, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1447,6 +1567,14 @@ "sub_var": "itemid", "callback": "convert_unit(binary_op(`*`, 10), 'mg/L', 'mg/dl')" } + ], + "sic": [ + { + "ids": 341, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -1507,6 +1635,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [702, 705], + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -2191,6 +2327,21 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 197, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 299, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ] } }, @@ -2471,6 +2622,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 716, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2596,6 +2755,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 344, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2685,6 +2852,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 684, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2766,6 +2941,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [348, 656], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2802,6 +2985,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 214, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2838,6 +3029,14 @@ "sub_var": "variableid", "class": "hrd_itm" } + ], + "sic": [ + { + "ids": [196, 660], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2891,6 +3090,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [217, 682], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -2956,6 +3163,13 @@ "sub_var": "itemid", "callback": "convert_unit(binary_op(`*`, 2.54), 'cm', '^in')" } + ], + "sic": [ + { + "table": "cases", + "val_var": "heightonadmission", + "class": "col_itm" + } ] } }, @@ -3018,6 +3232,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [289, 658], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3081,7 +3303,7 @@ ], "sic": [ { - "ids": 708, + "ids": [708, 724], "table": "data_float_h", "sub_var": "dataid", "class": "sic_itm" @@ -3283,6 +3505,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [463, 685], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3343,6 +3573,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [465, 657], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3513,6 +3751,21 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 223, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 302, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ] } }, @@ -3583,6 +3836,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [702, 705], + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -3643,6 +3904,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 566, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3705,6 +3974,15 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 290, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 0.16114), '%')", + "class": "sic_itm" + } ] } }, @@ -3765,6 +4043,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 286, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3864,6 +4150,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 661, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -3936,6 +4230,15 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 468, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 2.431), 'mg/dL')", + "class": "sic_itm" + } ] } }, @@ -4062,6 +4365,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 469, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4128,6 +4439,21 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 230, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 308, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ] } }, @@ -4408,6 +4734,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 710, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -4477,6 +4811,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 687, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4536,6 +4878,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 688, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4681,6 +5031,15 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 471, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 3.097521), 'mg/dL')", + "class": "sic_itm" + } ] } }, @@ -4741,6 +5100,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 314, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4801,6 +5168,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 689, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4852,6 +5227,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 598, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -4911,6 +5294,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 597, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -5032,6 +5423,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 599, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -5144,6 +5543,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 719, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -5271,6 +5678,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": [701, 704], + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -5569,6 +5984,14 @@ "sub_var": "itemid", "callback": "convert_unit(fahr_to_cels, 'C', 'f')" } + ], + "sic": [ + { + "ids": 709, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -5666,6 +6089,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 481, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -5784,6 +6215,14 @@ "table": "outputevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 725, + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" + } ] } }, @@ -6075,6 +6514,14 @@ "table": "labevents", "sub_var": "itemid" } + ], + "sic": [ + { + "ids": 301, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ] } }, @@ -6137,6 +6584,14 @@ "table": "chartevents", "sub_var": "itemid" } + ], + "sic": [ + { + "table": "cases", + "val_var": "weightonadmission", + "class": "col_itm", + "callback": "transform_fun(binary_op(`/`, 1000))" + } ] } } From e84ea7d593d79b0b10df75159ae454e54baed729 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 09:39:28 +0200 Subject: [PATCH 12/38] add age and los_icu concepts --- inst/extdata/config/concept-dict.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index 0ce2e180..38c226e3 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -307,6 +307,13 @@ "callback": "transform_fun(mimic_age)", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "val_var": "ageonadmission", + "class": "col_itm" + } ] } }, @@ -3684,6 +3691,14 @@ "win_type": "icustay", "class": "fun_itm" } + ], + "sic": [ + { + "table": "cases", + "val_var": "timeofstay", + "callback": "transform_fun(binary_op(`/`, 60 * 60 * 24))", + "class": "col_itm" + } ] } }, From b9350a1a98801968f0ab964ac63c5a38a3917a96 Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 09:40:15 +0200 Subject: [PATCH 13/38] add most medication concepts for sic --- R/callback-itm.R | 24 +++++++ inst/extdata/config/concept-dict.json | 99 +++++++++++++++++++++++++++ inst/extdata/config/data-sources.json | 1 + 3 files changed, 124 insertions(+) diff --git a/R/callback-itm.R b/R/callback-itm.R index 7f4d48ba..deffa031 100644 --- a/R/callback-itm.R +++ b/R/callback-itm.R @@ -620,6 +620,21 @@ aumc_rate_units <- function(mcg_to_units) { } } +sic_rate_kg <- function(x, val_var, unit_var, stop_var, env, ...) { + + g_to_mcg <- convert_unit(binary_op(`*`, 1000000), "mcg", "g") + + res <- g_to_mcg(x, val_var, unit_var) + res <- add_weight(res, env, "weight") + + res <- res[, c(val_var) := get(val_var) / get("weight")] + res <- res[, c(unit_var) := paste(get(unit_var), 'min', sep = "/kg/")] + + expand(res, index_var(x), stop_var, + keep_vars = c(id_vars(x), val_var, unit_var)) +} + + eicu_duration <- function(gap_length) { assert_that(is_interval(gap_length), is_scalar(gap_length)) @@ -641,6 +656,15 @@ aumc_dur <- function(x, val_var, stop_var, grp_var, ...) { calc_dur(x, val_var, index_var(x), stop_var, grp_var) } +default_duration <- function(x, val_var, stop_var, grp_var, ...) { + calc_dur(x, val_var, index_var(x), stop_var, grp_var) +} + +no_duration <- function(x, val_var, grp_var, ...) { + calc_dur(x, val_var, index_var(x), index_var(x), grp_var) +} + + #' Used for determining vasopressor durations, `calc_dur()` will calculate #' durations by taking either per ID or per combination of ID and `grp_var` #' the minimum for `min_var` and the maximum of `max_var` and returning the diff --git a/inst/extdata/config/concept-dict.json b/inst/extdata/config/concept-dict.json index 38c226e3..5de3f492 100644 --- a/inst/extdata/config/concept-dict.json +++ b/inst/extdata/config/concept-dict.json @@ -99,6 +99,13 @@ "sub_var": "itemid", "callback": "transform_fun(set_val(TRUE))" } + ], + "sic": [ + { + "ids": [1401, 1406, 1408, 1410, 1418, 1421, 1422, 1423, 1428, 1431, 1433, 1436, 1439, 1446, 1449, 1451, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1577, 1603, 1628, 1605, 1997, 1693, 1606, 1813, 1913, 1927, 1819], + "table": "medication", + "sub_var": "drugid" + } ] } }, @@ -1440,6 +1447,14 @@ "sub_var": "pharmaid", "callback": "transform_fun(set_val(TRUE))" } + ], + "sic": [ + { + "ids": [1397, 1506, 1524, 1525, 1751, 1977], + "table": "medication", + "sub_var": "drugid", + "callback": "transform_fun(set_val(TRUE))" + } ] } }, @@ -1942,6 +1957,16 @@ "grp_var": "linkorderid", "callback": "mimic_dur_inmv" } + ], + "sic": [ + { + "ids": 1559, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ] } }, @@ -2033,6 +2058,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } + ], + "sic": [ + { + "ids": 1559, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ] } }, @@ -2120,6 +2156,16 @@ "grp_var": "linkorderid", "callback": "mimic_dur_inmv" } + ], + "sic": [ + { + "ids": 1618, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ] } }, @@ -2202,6 +2248,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } + ], + "sic": [ + { + "ids": 1618, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ] } }, @@ -2437,6 +2494,16 @@ "grp_var": "linkorderid", "callback": "mimic_dur_inmv" } + ], + "sic": [ + { + "ids": 1502, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ] } }, @@ -2528,6 +2595,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } + ], + "sic": [ + { + "ids": 1502, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ] } }, @@ -4565,6 +4643,16 @@ "grp_var": "linkorderid", "callback": "mimic_dur_inmv" } + ], + "sic": [ + { + "ids": 1562, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ] } }, @@ -4663,6 +4751,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } + ], + "sic": [ + { + "ids": 1562, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ] } }, diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index d74cda96..bc1461ba 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -9667,6 +9667,7 @@ "files": "medication.csv.gz", "defaults": { "index_var": "offset", + "val_var": "amount", "time_vars": ["offset", "offsetdrugend"] }, "cols": { From 5419ab4c6a6f38fc2fc15e2e44744cacbb9c9f9c Mon Sep 17 00:00:00 2001 From: prockenschaub Date: Wed, 26 Jul 2023 15:28:43 +0200 Subject: [PATCH 14/38] fix preproc for data_float_h some values are only taken once during the hour and thus have a cnt=1 and rawdata=NA. The actual data is stored in Val, which otherwise holds the average. Since after expansion, rawdata is the main data field, the value from Val needs to be moved to rawdata in this case. --- R/callback-tbl.R | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/R/callback-tbl.R b/R/callback-tbl.R index 6777e073..d8e0be63 100644 --- a/R/callback-tbl.R +++ b/R/callback-tbl.R @@ -13,12 +13,17 @@ sic_data_float_h <- function(dat, ...) { setDT(dat) dat[, c("rawdata") := lapply(get("rawdata"), hexstring_to_float)] # TODO: remove hard coding of rawdata and derive from JSON config dat <- dat[, .( - Offset = Offset + 60 * (0:(sapply(rawdata, length)-1)), - Val = Val, - cnt = cnt, - rawdata = unlist(rawdata) - ), - by = .(id, CaseID, DataID) + Offset = Offset + 60 * (0:(sapply(rawdata, length)-1)), + Val = Val, + cnt = cnt, + rawdata = unlist(rawdata), + rawdata_present = !is.na(rawdata) + ), + by = .(id, CaseID, DataID) ] + dat[rawdata_present == FALSE, rawdata := Val] # Fix measurements that only have one + dat[, rawdata_present := NULL] dat -} \ No newline at end of file +} + + From 078149e5b5211e6d18db8ab6eede26965dca9ccd Mon Sep 17 00:00:00 2001 From: Drago Date: Fri, 17 Mar 2023 12:52:31 -0400 Subject: [PATCH 15/38] add OMR to miiv --- inst/extdata/config/data-sources.json | 31 +++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index bc1461ba..a6f8dc67 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -7173,6 +7173,37 @@ } } }, + "omr" : { + "files": "core/omr.csv.gz", + "defaults": { + "timevars": ["chartdate"], + "val_var": "result_value" + }, + "num_rows": 6439169, + "cols" : { + "subject_id": { + "name": "subject_id", + "spec": "col_integer" + }, + "chartdate": { + "name": "chartdate", + "spec": "col_datetime", + "format": "%Y-%m-%d" + }, + "seq_num": { + "name": "seq_num", + "spec": "col_integer" + }, + "result_name": { + "name": "result_name", + "spec": "col_character" + }, + "result_value": { + "name": "result_value", + "spec": "col_character" + } + } + }, "transfers": { "files": "core/transfers.csv.gz", "defaults": { From e5be7e871dc90db9098dc1d56cd47805210d8ce8 Mon Sep 17 00:00:00 2001 From: Drago Date: Tue, 11 Apr 2023 17:23:45 -0400 Subject: [PATCH 16/38] add miiv omr --- inst/extdata/config/data-sources.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/config/data-sources.json b/inst/extdata/config/data-sources.json index a6f8dc67..ba91be50 100644 --- a/inst/extdata/config/data-sources.json +++ b/inst/extdata/config/data-sources.json @@ -7176,7 +7176,7 @@ "omr" : { "files": "core/omr.csv.gz", "defaults": { - "timevars": ["chartdate"], + "time_vars": ["chartdate"], "val_var": "result_value" }, "num_rows": 6439169, From c8d0c9bba110081bf9d782d7ad0a5a4902fbac0a Mon Sep 17 00:00:00 2001 From: Drago Date: Mon, 1 May 2023 12:04:45 -0400 Subject: [PATCH 17/38] load_concepts() concepts arg doc fix --- R/concept-load.R | 7 ++-- man/change_id.Rd | 58 ++++++++++---------------- man/data_env.Rd | 99 +++++++++++++------------------------------- man/load_concepts.Rd | 5 ++- 4 files changed, 57 insertions(+), 112 deletions(-) diff --git a/R/concept-load.R b/R/concept-load.R index bca69bb3..174a089c 100644 --- a/R/concept-load.R +++ b/R/concept-load.R @@ -166,8 +166,9 @@ load_concepts <- function(x, ...) UseMethod("load_concepts", x) #' @param src A character vector, used to subset the `concepts`; `NULL` #' means no subsetting -#' @param concepts The concepts to be used or `NULL` in which case -#' [load_dictionary()] is called +#' @param concepts The concepts to be used, or `NULL`. In the latter case the +#' standard ricu dictionary (obtained by calling [load_dictionary()]) is used +#' for loading the objects specified in `x`. #' @param dict_name,dict_dirs In case not concepts are passed as `concepts`, #' these are forwarded to [load_dictionary()] as `name` and `file` arguments #' @@ -179,8 +180,6 @@ load_concepts.character <- function(x, src = NULL, concepts = NULL, ..., if (is.null(concepts)) { - assert_that(not_null(src)) - load_concepts( load_dictionary(src, x, name = dict_name, cfg_dirs = dict_dirs), src = NULL, ... diff --git a/man/change_id.Rd b/man/change_id.Rd index 8c3fa01e..6193adf5 100644 --- a/man/change_id.Rd +++ b/man/change_id.Rd @@ -57,34 +57,19 @@ and \code{downgrade_id()} when the target ID system is of lower cardinality } \details{ In order to provide ID system conversion for a data source, the (internal) -function \code{\link[=id_map]{id_map()}} must be able to construct an ID mapping for that data +function [id_map()] must be able to construct an ID mapping for that data source. Constructing such a mapping can be expensive w.r.t. the frequency -it might be re-used and therefore, \code{\link[=id_map]{id_map()}} provides caching +it might be re-used and therefore, [id_map()] provides caching infrastructure. The mapping itself is constructed by the (internal) -function \code{\link[=id_map_helper]{id_map_helper()}}, which is expected to provide source and +function [id_map_helper()], which is expected to provide source and destination ID columns as well as start and end columns corresponding to the destination ID, relative to the source ID system. In the following -example, we request for \code{mimic_demo}, with ICU stay IDs as source and +example, we request for `mimic_demo`, with ICU stay IDs as source and hospital admissions as destination IDs. -\if{html}{\out{
}}\preformatted{id_map_helper(mimic_demo, "icustay_id", "hadm_id") -#> # An `id_tbl`: 136 x 4 -#> # Id var: `icustay_id` -#> icustay_id hadm_id hadm_id_start hadm_id_end -#> -#> 1 201006 198503 -3290 mins 9114 mins -#> 2 201204 114648 -2 mins 6949 mins -#> 3 203766 126949 -1336 mins 8818 mins -#> 4 204132 157609 -1 mins 10103 mins -#> 5 204201 177678 -368 mins 9445 mins -#> ... -#> 132 295043 170883 -10413 mins 31258 mins -#> 133 295741 176805 -1 mins 3153 mins -#> 134 296804 110244 -1294 mins 4599 mins -#> 135 297782 167612 -1 mins 207 mins -#> 136 298685 151323 -1 mins 19082 mins -#> # i 131 more rows -}\if{html}{\out{
}} +```{r, eval = is_data_avail("mimic_demo")} +id_map_helper(mimic_demo, "icustay_id", "hadm_id") +``` Both start and end columns encode the hospital admission windows relative to each corresponding ICU stay start time. It therefore comes as no @@ -93,21 +78,22 @@ occurs before ICU stay start time), while end times are often days in the future (as hospital discharge typically occurs several days after ICU admission). -In order to use the ID conversion infrastructure offered by \code{ricu} for a -new dataset, it typically suffices to provide an \code{id_cfg} entry in the -source configuration (see \code{\link[=load_src_cfg]{load_src_cfg()}}), outlining the available ID +In order to use the ID conversion infrastructure offered by `ricu` for a +new dataset, it typically suffices to provide an `id_cfg` entry in the +source configuration (see [load_src_cfg()]), outlining the available ID systems alongside an ordering, as well as potentially a class specific -implementation of \code{\link[=id_map_helper]{id_map_helper()}} for the given source class, specifying +implementation of [id_map_helper()] for the given source class, specifying the corresponding time windows in 1 minute resolution (for every possible pair of IDs). -While both up- and downgrades for \code{id_tbl} objects, as well as downgrades -for \code{ts_tbl} objects are simple merge operations based on the ID mapping -provided by \code{\link[=id_map]{id_map()}}, ID upgrades for \code{ts_tbl} objects are slightly more -involved. As an example, consider the following setting: we have \code{data} -associated with \code{hadm_id} IDs and times relative to hospital admission: +While both up- and downgrades for `id_tbl` objects, as well as downgrades +for `ts_tbl` objects are simple merge operations based on the ID mapping +provided by [id_map()], ID upgrades for `ts_tbl` objects are slightly more +involved. As an example, consider the following setting: we have `data` +associated with `hadm_id` IDs and times relative to hospital admission: -\if{html}{\out{
}}\preformatted{ 1 2 3 4 5 6 7 8 +``` + 1 2 3 4 5 6 7 8 data ---*------*-------*--------*-------*-------*--------*------*--- 3h 10h 18h 27h 35h 43h 52h 59h @@ -117,17 +103,17 @@ hadm_id |-------------------------------------------------------------| icustay_id |------------------| |---------------| 0h 19h 0h 16h ICU_1 ICU_2 -}\if{html}{\out{
}} +``` -The mapping of data points from \code{hadm_id} to \code{icustay_id} is created as +The mapping of data points from `hadm_id` to `icustay_id` is created as follows: ICU stay end times mark boundaries and all data that is recorded after the last ICU stay ended is assigned to the last ICU stay. Therefore -data points 1-3 are assigned to \code{ICU_1}, while 4-8 are assigned to \code{ICU_2}. +data points 1-3 are assigned to `ICU_1`, while 4-8 are assigned to `ICU_2`. Times have to be shifted as well, as timestamps are expected to be relative to the current ID system. Data points 1-3 therefore are assigned to time stamps -4h, 3h and 11h, while data points 4-8 are assigned to -10h, -2h, 6h, 15h and 22h. Implementation-wise, the mapping is computed using an -efficient \code{data.table} rolling join. +efficient `data.table` rolling join. } \examples{ if (require(mimic.demo)) { diff --git a/man/data_env.Rd b/man/data_env.Rd index 0fd9d2a0..6249b595 100644 --- a/man/data_env.Rd +++ b/man/data_env.Rd @@ -42,98 +42,57 @@ hosted data source is available as well. As with the PhysioNet datasets, access is public but has to be granted by the data collectors. } \details{ -Setting up a dataset for use with \code{ricu} requires a configuration object. +Setting up a dataset for use with `ricu` requires a configuration object. For the included datasets, configuration can be loaded from -\if{html}{\out{
}}\preformatted{system.file("extdata", "config", "data-sources.json", package = "ricu") -}\if{html}{\out{
}} +``` +system.file("extdata", "config", "data-sources.json", package = "ricu") +``` -by calling \code{\link[=load_src_cfg]{load_src_cfg()}} and for dataset that are external to \code{ricu}, +by calling [load_src_cfg()] and for dataset that are external to `ricu`, additional configuration can be made available by setting the environment -variable \code{RICU_CONFIG_PATH} (for more information, refer to -\code{\link[=load_src_cfg]{load_src_cfg()}}). Using the dataset configuration object, data can be -downloaded (\code{\link[=download_src]{download_src()}}), imported (\code{\link[=import_src]{import_src()}}) and attached -(\code{\link[=attach_src]{attach_src()}}). While downloading and importing are one-time procedures, +variable `RICU_CONFIG_PATH` (for more information, refer to +[load_src_cfg()]). Using the dataset configuration object, data can be +downloaded ([download_src()]), imported ([import_src()]) and attached +([attach_src()]). While downloading and importing are one-time procedures, attaching of the dataset is repeated every time the package is loaded. Briefly, downloading loads the raw dataset from the internet (most likely -in \code{.csv} format), importing consists of some preprocessing to make the -data available more efficiently (by converting it to \code{\link[fst:fst]{.fst}} +in `.csv` format), importing consists of some preprocessing to make the +data available more efficiently (by converting it to [`.fst`][fst::fst()] format) and attaching sets up the data for use by the package. For more information on the individual steps, refer to the respective documentation pages. A dataset that has been successfully made available can interactively be explored by typing its name into the console and individual tables can be -inspected using the \code{$} function. For example for the MIMIC-III demo -dataset and the \code{icustays} table, this gives - -\if{html}{\out{
}}\preformatted{mimic_demo -#> -#> admissions callout caregivers chartevents -#> [129 x 19] [77 x 24] [7,567 x 4] [758,355 x 15] -#> cptevents d_cpt d_icd_diagnoses d_icd_procedures -#> [1,579 x 12] [134 x 9] [14,567 x 4] [3,882 x 4] -#> d_items d_labitems datetimeevents diagnoses_icd -#> [12,487 x 10] [753 x 6] [15,551 x 14] [1,761 x 5] -#> drgcodes icustays inputevents_cv inputevents_mv -#> [297 x 8] [136 x 12] [34,799 x 22] [13,224 x 31] -#> labevents microbiologyevents outputevents patients -#> [76,074 x 9] [2,003 x 16] [11,320 x 13] [100 x 8] -#> prescriptions procedureevents_mv procedures_icd services -#> [10,398 x 19] [753 x 25] [506 x 5] [163 x 6] -#> transfers -#> [524 x 13] +inspected using the `$` function. For example for the MIMIC-III demo +dataset and the `icustays` table, this gives + +```{r, eval = is_data_avail("mimic_demo")} +mimic_demo mimic_demo$icustays -#> # : [136 x 12] -#> # ID options: subject_id (patient) < hadm_id (hadm) < icustay_id (icustay) -#> # Defaults: `intime` (index), `last_careunit` (val) -#> # Time vars: `intime`, `outtime` -#> row_id subject_id hadm_id icustay_id dbsource first_careunit last_careunit -#> -#> 1 12742 10006 142345 206504 carevue MICU MICU -#> 2 12747 10011 105331 232110 carevue MICU MICU -#> 3 12749 10013 165520 264446 carevue MICU MICU -#> 4 12754 10017 199207 204881 carevue CCU CCU -#> 5 12755 10019 177759 228977 carevue MICU MICU -#> ... -#> 132 42676 44083 198330 286428 metavision CCU CCU -#> 133 42691 44154 174245 217724 metavision MICU MICU -#> 134 42709 44212 163189 239396 metavision MICU MICU -#> 135 42712 44222 192189 238186 metavision CCU CCU -#> 136 42714 44228 103379 217992 metavision SICU SICU -#> # i 131 more rows -#> # i 5 more variables: first_wardid , last_wardid , intime , -#> # outtime , los -}\if{html}{\out{
}} +``` Table subsets can be loaded into memory for example using the -\code{\link[base:subset]{base::subset()}} function, which uses non-standard evaluation (NSE) to +[base::subset()] function, which uses non-standard evaluation (NSE) to determine a row-subsetting. This design choice stems form the fact that some tables can have on the order of 10^8 rows, which makes loading full tables into memory an expensive operation. Table subsets loaded into -memory are represented as \code{\link[data.table:data.table]{data.table}} objects. +memory are represented as [`data.table`][data.table::data.table()] objects. Extending the above example, if only ICU stays corresponding to the patient -with \code{subject_id == 10124} are of interest, the respective data can be +with `subject_id == 10124` are of interest, the respective data can be loaded as -\if{html}{\out{
}}\preformatted{subset(mimic_demo$icustays, subject_id == 10124) -#> row_id subject_id hadm_id icustay_id dbsource first_careunit last_careunit -#> 1: 12863 10124 182664 261764 carevue MICU MICU -#> 2: 12864 10124 170883 222779 carevue MICU MICU -#> 3: 12865 10124 170883 295043 carevue CCU CCU -#> 4: 12866 10124 170883 237528 carevue MICU MICU -#> first_wardid last_wardid intime outtime los -#> 1: 23 23 2192-03-29 10:46:51 2192-04-01 06:36:00 2.8258 -#> 2: 50 50 2192-04-16 20:58:32 2192-04-20 08:51:28 3.4951 -#> 3: 7 7 2192-04-24 02:29:49 2192-04-26 23:59:45 2.8958 -#> 4: 23 23 2192-04-30 14:50:44 2192-05-15 23:34:21 15.3636 -}\if{html}{\out{
}} - -Much care has been taken to make \code{ricu} extensible to new datasets. For -example the publicly available ICU database \href{https://amsterdammedicaldatascience.nl/amsterdamumcdb/}{AmsterdamUMCdb } +```{r, eval = is_data_avail("mimic_demo")} +subset(mimic_demo$icustays, subject_id == 10124) +``` + +Much care has been taken to make `ricu` extensible to new datasets. For +example the publicly available ICU database [AmsterdamUMCdb +](https://amsterdammedicaldatascience.nl/amsterdamumcdb/) provided by the Amsterdam University Medical Center, currently is not part -of the core datasets of \code{ricu}, but code for integrating this dataset is -available on \href{https://github.com/eth-mds/aumc}{github}. +of the core datasets of `ricu`, but code for integrating this dataset is +available on [github](https://github.com/eth-mds/aumc). } \section{MIMIC-III}{ diff --git a/man/load_concepts.Rd b/man/load_concepts.Rd index 100ed04c..f2c687ff 100644 --- a/man/load_concepts.Rd +++ b/man/load_concepts.Rd @@ -92,8 +92,9 @@ load_concepts(x, ...) \item{src}{A character vector, used to subset the \code{concepts}; \code{NULL} means no subsetting} -\item{concepts}{The concepts to be used or \code{NULL} in which case -\code{\link[=load_dictionary]{load_dictionary()}} is called} +\item{concepts}{The concepts to be used, or \code{NULL}. In the latter case the +standard ricu dictionary (obtained by calling \code{\link[=load_dictionary]{load_dictionary()}}) is used +for loading the objects specified in \code{x}.} \item{dict_name, dict_dirs}{In case not concepts are passed as \code{concepts}, these are forwarded to \code{\link[=load_dictionary]{load_dictionary()}} as \code{name} and \code{file} arguments} From b1e2aed3b806e357fe8c4e3f056a2264834735ba Mon Sep 17 00:00:00 2001 From: Drago Date: Mon, 1 May 2023 12:13:37 -0400 Subject: [PATCH 18/38] load_concepts.integer() src NULL fix --- R/concept-load.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/concept-load.R b/R/concept-load.R index 174a089c..a420f1bf 100644 --- a/R/concept-load.R +++ b/R/concept-load.R @@ -201,8 +201,6 @@ load_concepts.integer <- function(x, src = NULL, concepts = NULL, ..., if (is.null(concepts)) { - assert_that(not_null(src)) - concepts <- load_dictionary(src, name = dict_name, cfg_dirs = dict_dirs) } else if (not_null(src)) { From 9c3481f2f7bb9d9790f34dc54c05ad0fc877bf1f Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Wed, 20 Mar 2024 20:07:05 +0100 Subject: [PATCH 19/38] Fix sic config --- inst/extdata/config/data-sources/sic.json | 99 ++++++++--------------- 1 file changed, 34 insertions(+), 65 deletions(-) diff --git a/inst/extdata/config/data-sources/sic.json b/inst/extdata/config/data-sources/sic.json index 760b50fc..351f1ab9 100644 --- a/inst/extdata/config/data-sources/sic.json +++ b/inst/extdata/config/data-sources/sic.json @@ -4,17 +4,17 @@ "url": "https://physionet.org/files/sicdb/1.0.6", "id_cfg": { "patient": { - "id": "PatientID", + "id": "patientid", "position": 1, - "start": "ICUOffset", - "end": "OffsetOfDeath", + "start": "icuoffset", + "end": "offsetofdeath", "table": "cases" }, "icustay": { - "id": "CaseID", + "id": "caseid", "position": 2, - "start": "ICUOffset", - "end": "TimeOfStay", + "start": "icuoffset", + "end": "timeofstay", "table": "cases" } }, @@ -22,13 +22,13 @@ "cases": { "files": "cases.csv.gz", "defaults": { - "index_var": "ICUOffset", + "index_var": "icuoffset", "time_vars": [ - "ICUOffset", - "OffsetOfDeath", - "HeartSurgeryBeginOffset", - "HeartSurgeryEndOffset", - "OffsetAfterFirstAdmission" + "icuoffset", + "offsetofdeath", + "heartsurgerybeginoffset", + "heartsurgeryendoffset", + "offsetafterfirstadmission" ] }, "num_rows": 27386, @@ -193,16 +193,12 @@ "data_float_h": { "files": "data_float_h.csv.gz", "defaults": { - "index_var": "Offset", - "val_var": "Val", - "time_vars": "Offset" + "index_var": "offset", + "val_var": "val", + "time_vars": "offset" }, "num_rows": 36785241, "cols": { - "id": { - "name": "id", - "spec": "col_integer" - }, "caseid": { "name": "CaseID", "spec": "col_integer" @@ -225,48 +221,21 @@ }, "rawdata": { "name": "rawdata", - "spec": "col_double" + "spec": "col_character" } }, "partitioning": { "col": "dataid", "breaks": [ - 1, - 2, - 3, - 4, - 7, - 28, - 29, - 702, - 703, - 705, - 708, - 709, - 710, - 715, - 717, - 719, - 724, - 725, - 731, - 773, - 2018, - 2274, - 2278, - 2280, - 2283, - 2290, - 3056, - 3059, - 3071 - ] - } + 702, 703, 705, 708, 709, 710, 715, 717, 719, 724, 725, + 731, 773, 2018, 2274, 2278, 2280, 2283, 2290, 3056, 3059, 3071] + }, + "callback": "sic_data_float_h" }, "data_ref": { "files": "data_ref.csv.gz", "defaults": { - "index_var": "OffsetAfterFirstAdmission" + "index_var": "offsetafterfirstadmission" }, "num_rows": 354157, "cols": { @@ -282,8 +251,8 @@ "name": "RefID", "spec": "col_integer" }, - "customfieldid": { - "name": "CustomFieldID", + "fieldid": { + "name": "FieldID", "spec": "col_integer" } } @@ -291,9 +260,9 @@ "laboratory": { "files": "laboratory.csv.gz", "defaults": { - "index_var": "Offset", - "val_var": "LaboratoryValue", - "time_vars": "Offset" + "index_var": "offset", + "val_var": "laboratoryvalue", + "time_vars": "offset" }, "num_rows": 17572279, "cols": { @@ -326,11 +295,11 @@ "medication": { "files": "medication.csv.gz", "defaults": { - "index_var": "Offset", + "index_var": "offset", "val_var": "Amount", "time_vars": [ - "Offset", - "OffsetDrugEnd" + "offset", + "offsetdrugend" ] }, "num_rows": 5141346, @@ -380,10 +349,10 @@ "data_range": { "files": "data_range.csv.gz", "defaults": { - "index_var": "Offset", + "index_var": "offset", "time_vars": [ - "Offset", - "OffsetEnd" + "offset", + "offsetend" ] }, "num_rows": 183339, @@ -417,8 +386,8 @@ "unitlog": { "files": "unitlog.csv.gz", "defaults": { - "index_var": "Offset", - "time_vars": "Offset" + "index_var": "offset", + "time_vars": "offset" }, "num_rows": 139968, "cols": { From d5d4c074c2063ea1d464997d1e3c22208572d258 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Wed, 20 Mar 2024 20:07:27 +0100 Subject: [PATCH 20/38] Properly support full rawdata found in sic --- R/callback-tbl.R | 28 +++++++++++++++++++++++++ R/config-utils.R | 28 +++++++++++++++++++++++++ R/setup-import.R | 53 ++++++++++++++++++++++++++++-------------------- 3 files changed, 87 insertions(+), 22 deletions(-) create mode 100644 R/callback-tbl.R diff --git a/R/callback-tbl.R b/R/callback-tbl.R new file mode 100644 index 00000000..fc31a6c9 --- /dev/null +++ b/R/callback-tbl.R @@ -0,0 +1,28 @@ + +sic_data_float_h <- function(dat, ...) { + hexstring_to_float <- function(x) { + if (is.na(x)) { + return(NA_real_) + } + hexstring <- substring(x, seq(1, 482, 2), seq(2, 482, 2)) + bytes <- as.raw(strtoi(hexstring[-1], base = 16)) + floats <- readBin(bytes, numeric(), length(bytes) %/% 4, 4, endian = "little") + ifelse(floats == 0, NA_real_, floats) + } + + setDT(dat) + dat[, c("rawdata") := lapply(get("rawdata"), hexstring_to_float)] # TODO: remove hard coding of rawdata and derive from JSON config + dat <- dat[, .( + Offset = Offset + 60 * (0:(sapply(rawdata, length)-1)), + Val = Val, + cnt = cnt, + rawdata = unlist(rawdata), + rawdata_present = !is.na(rawdata) + ), + by = .(id, CaseID, DataID) + ] + dat[rawdata_present == FALSE, rawdata := Val] # Fix measurements that only have one + dat[, rawdata_present := NULL] + print(dat) + dat +} \ No newline at end of file diff --git a/R/config-utils.R b/R/config-utils.R index ca9e6a49..bdada7db 100644 --- a/R/config-utils.R +++ b/R/config-utils.R @@ -386,6 +386,34 @@ partition_col <- function(x, orig_names = FALSE) { col } + +tbl_callback <- function(x){ + x <- as_tbl_cfg(x) + assert_that(length(x) == 1L) + + + if (!("callback" %in% vctrs::fields(x))) { + return(identity_callback) + } + + callback_field <- vctrs::field(x, "callback") + if (is.character(callback_field)) { + return(str_to_fun(callback_field)) + } + + if (!is.null(callback_field) && !is.list(callback_field)) { + return(identity_callback) + } + + callback_value <- callback_field[[1]] + if (is.character(callback_value)) { + return(str_to_fun(callback_field[1])) + } + + return(identity_callback) + } + + #' @export n_tick.tbl_cfg <- function(x) { diff --git a/R/setup-import.R b/R/setup-import.R index 0d2e536d..ead910f6 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -184,6 +184,9 @@ import_tbl.tbl_cfg <- function(x, data_dir = src_data_dir(x), progress = NULL, assert_that(is.dir(data_dir), is.flag(cleanup)) + # Print number of parts + print(paste("[import_tbl] Import table ", tbl_name(x))) + print(paste("[import_tbl] Number of parts: ", n_part(x))) if (n_part(x) > 1L) { partition_table(x, data_dir, progress, ...) } else { @@ -257,6 +260,8 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, file <- file.path(dir, rawf) name <- tbl_name(x) + callback <- tbl_callback(x) + exp_row <- n_row(x) if (is.na(exp_row)) { @@ -267,17 +272,19 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, if (length(file) == 1L) { - callback <- function(x, pos, ...) { - report_problems(x, rawf) - split_write(x, pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, - progress, name, tick) - } + process_chunk <- function(x, pos, ...) { + report_problems(x, rawf) + split_write(callback(x), pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, + progress, name, tick) + } if (grepl("\\.gz$", file)) { + print("[partition_table] gunzipping") file <- gunzip(file, tempdir) } - readr::read_csv_chunked(file, callback, chunk_length, col_types = spec, + print(paste("[partition_table] reading csv chunked with chunk_length: ", chunk_length)) + readr::read_csv_chunked(file, process_chunk, chunk_length, col_types = spec, progress = FALSE, ...) if (is.na(exp_row)) { @@ -291,7 +298,7 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, dat <- readr::read_csv(file[i], col_types = spec, progress = FALSE, ...) report_problems(dat, rawf[i]) - split_write(dat, pfun, tempdir, i, progress, name, tick) + split_write(callback(data), pfun, tempdir, i, progress, name, tick) } } @@ -355,6 +362,7 @@ csv_to_fst <- function(x, dir, progress = NULL, ...) { raw <- raw_file_name(x) src <- file.path(dir, raw) dst <- file.path(dir, fst_file_name(x)) + callback <- tbl_callback(x) assert_that(length(x) == 1L, length(src) == 1L, length(dst) == 1L) @@ -364,6 +372,7 @@ csv_to_fst <- function(x, dir, progress = NULL, ...) { report_problems(dat, raw) + dat <- callback(dat) dat <- rename_cols(setDT(dat), ricu_cols(x), orig_cols(x)) fst::write_fst(dat, dst, compress = 100L) @@ -422,24 +431,24 @@ report_problems <- function(x, file) { invisible(NULL) } -report_problems <- function(x, file) { +# report_problems <- function(x, file) { - prob_to_str <- function(x) { - paste0("[", x[1L], ", ", x[2L], "]: got '", x[4L], "' instead of ", x[3L]) - } +# prob_to_str <- function(x) { +# paste0("[", x[1L], ", ", x[2L], "]: got '", x[4L], "' instead of ", x[3L]) +# } - probs <- readr::problems(x) +# probs <- readr::problems(x) - if (nrow(probs)) { +# if (nrow(probs)) { - probs <- bullet(apply(probs, 1L, prob_to_str)) +# probs <- bullet(apply(probs, 1L, prob_to_str)) - warn_ricu( - c("Encountered parsing problems for file {basename(file)}:", probs), - class = "csv_parsing_error", indent = c(0L, rep_along(2L, probs)), - exdent = c(0L, rep_along(2L, probs)) - ) - } +# warn_ricu( +# c("Encountered parsing problems for file {basename(file)}:", probs), +# class = "csv_parsing_error", indent = c(0L, rep_along(2L, probs)), +# exdent = c(0L, rep_along(2L, probs)) +# ) +# } - invisible(NULL) -} +# invisible(NULL) +# } From 31d48f7550c37e9e0d7e4da23104d496100f5830 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Wed, 20 Mar 2024 20:42:21 +0100 Subject: [PATCH 21/38] Remove print --- R/callback-tbl.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/callback-tbl.R b/R/callback-tbl.R index fc31a6c9..b506c8ec 100644 --- a/R/callback-tbl.R +++ b/R/callback-tbl.R @@ -23,6 +23,6 @@ sic_data_float_h <- function(dat, ...) { ] dat[rawdata_present == FALSE, rawdata := Val] # Fix measurements that only have one dat[, rawdata_present := NULL] - print(dat) - dat + + return(dat) } \ No newline at end of file From eb41aaa386f1ce2496e4216c629e891991c7db0a Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Wed, 20 Mar 2024 21:04:39 +0100 Subject: [PATCH 22/38] Add utility functions proposed by `prockenschaub` here: https://github.com/eth-mds/ricu/pull/30/files --- R/concept-utils.R | 51 ++++++++++++++++++++++++++++++++++++++++++ R/data-utils.R | 57 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 90 insertions(+), 18 deletions(-) diff --git a/R/concept-utils.R b/R/concept-utils.R index 47f7d96b..d0e22d68 100644 --- a/R/concept-utils.R +++ b/R/concept-utils.R @@ -199,6 +199,29 @@ get_hirid_ids <- function(x, ids) { load_id("variables", x, .data$id %in% .env$ids, cols = "unit", id_var = "id") } +#' @rdname data_items + #' @export + init_itm.sic_itm <- function(x, table, sub_var, ids, + callback = "identity_callback", ...) { + + assert_that(is.string(table), has_length(ids), + is.character(ids) || is_intish(ids)) + + x[["table"]] <- table + + units <- get_sic_ids(x, ids) + units <- rename_cols(rm_na(units), sub_var, "referenceglobalid") + + todo <- c("ids", "units") + x[todo] <- mget(todo) + + complete_tbl_itm(x, callback, sub_var, ...) + } + + get_sic_ids <- function(x, ids) { + load_id("d_references", x, .data$referenceglobalid %in% .env$ids, cols = "referenceunit", id_var = "referenceglobalid") + } + #' @param unit_val String valued unit to be used in case no `unit_var` is #' available for the given table #' @@ -330,6 +353,10 @@ prepare_query.sel_itm <- prep_sel #' @export prepare_query.hrd_itm <- prep_sel +#' @keywords internal +#' @export +prepare_query.sic_itm <- prep_sel + #' @keywords internal #' @export prepare_query.rgx_itm <- function(x) { @@ -546,6 +573,17 @@ do_callback.hrd_itm <- function(x, ...) { NextMethod() } +#' @keywords internal + #' @export + do_callback.sic_itm <- function(x, ...) { + # TODO: generalise and combine with do_callback.hrd_itm + if (is.null(get_itm_var(x, "unit_var"))) { + x <- try_add_vars(x, unit_var = "referenceunit") + } + + NextMethod() +} + #' @keywords internal #' @export do_callback.col_itm <- function(x, ...) { @@ -604,6 +642,19 @@ do_itm_load.hrd_itm <- function(x, id_type = "icustay", interval = hours(1L)) { res } +#' @export + do_itm_load.sic_itm <- function(x, id_type = "icustay", interval = hours(1L)) { + + res <- NextMethod() + + if (is.null(get_itm_var(x, "unit_var"))) { + unt <- x[["units"]] + res <- merge(res, unt, by = get_itm_var(x, "sub_var"), all.x = TRUE) + } + + res +} + #' @export do_itm_load.col_itm <- function(x, id_type = "icustay", interval = hours(1L)) { diff --git a/R/data-utils.R b/R/data-utils.R index 31fa02df..d3941c0f 100644 --- a/R/data-utils.R +++ b/R/data-utils.R @@ -128,6 +128,28 @@ id_orig_helper.miiv_env <- function(x, id) { as_id_tbl(res, id, by_ref = TRUE) } +#' @rdname data_utils +#' @export +id_orig_helper.sic_env <- function(x, id) { + + if (!identical(id, "patientid")) { + return(NextMethod()) + } + + cfg <- as_id_cfg(x)[id == id_var_opts(x)] + + assert_that(length(cfg) == 1L) + + sta <- field(cfg, "start") + age <- "admissionyear" + + res <- as_src_tbl(x, field(cfg, "table")) + res <- res[, c(id, sta, age)] + res <- res[, c(sta, age) := shift_year(get(sta), get(age))] + + as_id_tbl(res, id, by_ref = TRUE) +} + #' @export id_orig_helper.default <- function(x, ...) stop_generic(x, .Generic) @@ -228,33 +250,32 @@ id_win_helper.eicu_env <- function(x) { order_rename(res, ids, sta, end) } -#' @rdname data_utils -#' @export -id_win_helper.sic_env <- function(x) { - - sec_as_mins <- function(x) min_as_mins(as.integer(x / 60)) - + + #' @rdname data_utils + #' @export + id_win_helper.sic_env <- function(x) { cfg <- sort(as_id_cfg(x), decreasing = TRUE) - + ids <- field(cfg, "id") - sta <- c(unique(field(cfg, "start")), "HospAdmTime") + sta <- field(cfg, "start") end <- field(cfg, "end") - + tbl <- as_src_tbl(x, unique(field(cfg, "table"))) - + mis <- setdiff(sta, colnames(tbl)) - + res <- load_src(tbl, cols = c(ids, intersect(sta, colnames(tbl)), end)) - - if (length(mis) > 0L) { - res[, c(mis) := 0L] - } - - res <- res[, c(sta, end) := lapply(.SD, sec_as_mins), .SDcols = c(sta, end)] + + assert_that(length(mis) == 1L) + res[, firstadmission := 0L] + + res <- res[, c(sta, end) := lapply(.SD, s_as_mins), .SDcols = c(sta, end)] + res[, timeofstay := offsetafterfirstadmission + timeofstay] + res <- setcolorder(res, c(ids, sta, end)) res <- rename_cols(res, c(ids, paste0(ids, "_start"), paste0(ids, "_end")), by_ref = TRUE) - + as_id_tbl(res, ids[2L], by_ref = TRUE) } From e4c930aed683ff9eb099f5ccdfd26b7df4f6d36c Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 08:42:12 +0100 Subject: [PATCH 23/38] Fix configs for `sic` based on `prockenschaub` --- .../config/concept-dict/blood_gas.json | 12 +++-- .../config/concept-dict/chemistry.json | 54 +++++++++++-------- .../config/concept-dict/demographics.json | 2 +- .../config/concept-dict/hematology.json | 10 +++- .../config/concept-dict/medications.json | 42 +++++---------- inst/extdata/config/concept-dict/vitals.json | 5 +- 6 files changed, 66 insertions(+), 59 deletions(-) diff --git a/inst/extdata/config/concept-dict/blood_gas.json b/inst/extdata/config/concept-dict/blood_gas.json index 04b7854d..16c003ee 100644 --- a/inst/extdata/config/concept-dict/blood_gas.json +++ b/inst/extdata/config/concept-dict/blood_gas.json @@ -67,9 +67,10 @@ ], "sic": [ { - "ids": 668, + "ids": [668, 449], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -150,10 +151,11 @@ ], "sic": [ { - "ids": 655, + "ids": [655, 452], "table": "laboratory", - "sub_var": "LaboratoryID" - } + "sub_var": "laboratoryid", + "class": "sic_itm" + }, ], "picdb": [] } diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index bae0b17c..763617a3 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -66,7 +66,8 @@ { "ids": 287, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -147,7 +148,8 @@ { "ids": 609, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -228,7 +230,8 @@ { "ids": 617, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -301,7 +304,8 @@ { "ids": 616, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -375,7 +379,8 @@ { "ids": 456, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -456,7 +461,8 @@ { "ids": 333, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -526,7 +532,8 @@ { "ids": 332, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -595,14 +602,6 @@ "sub_var": "itemid" } ], - "sic": [ - { - "ids": 355, - "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`*`, 2.14))" - } - ], "picdb": [] } }, @@ -675,8 +674,9 @@ { "ids": 457, "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`*`, 4.008))" + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 4.008), 'mg/dL', 'mmol/l')", + "class": "sic_itm" } ], "picdb": [] @@ -749,7 +749,8 @@ { "ids": 611, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -816,7 +817,8 @@ { "ids": 253, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -893,7 +895,8 @@ 450 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [] @@ -967,7 +970,8 @@ { "ids": 367, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1048,6 +1052,14 @@ "callback": "convert_unit(binary_op(`*`, 10), 'mg/L', 'mg/dl')" } ], + "sic": [ + { + "ids": 341, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [ { "table": "labevents", diff --git a/inst/extdata/config/concept-dict/demographics.json b/inst/extdata/config/concept-dict/demographics.json index 6f156f78..1e54fa3a 100644 --- a/inst/extdata/config/concept-dict/demographics.json +++ b/inst/extdata/config/concept-dict/demographics.json @@ -127,7 +127,7 @@ "sic": [ { "table": "cases", - "val_var": "AgeOnAdmission", + "val_var": "ageonadmission", "class": "col_itm" } ], diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 4cd8d094..60dee64f 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -59,7 +59,15 @@ { "ids": 174, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 295, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/medications.json b/inst/extdata/config/concept-dict/medications.json index 3ad798e8..fbea781c 100644 --- a/inst/extdata/config/concept-dict/medications.json +++ b/inst/extdata/config/concept-dict/medications.json @@ -408,35 +408,11 @@ } ], "sic": [ - { - "ids": [ - 1406, - 1408, - 1410, - 1418, - 1421, - 1422, - 1423, - 1428, - 1431, - 1433, - 1436, - 1449, - 1454, - 1457, - 1458, - 1459, - 1460, - 1461, - 1603, - 1795, - 1913, - 1927 - ], - "table": "medication", - "sub_var": "DrugID", - "callback": "transform_fun(set_val(TRUE))" - } + { + "ids": [1401, 1406, 1408, 1410, 1418, 1421, 1422, 1423, 1428, 1431, 1433, 1436, 1439, 1446, 1449, 1451, 1454, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, 1577, 1603, 1628, 1605, 1997, 1693, 1606, 1813, 1913, 1927, 1819], + "table": "medication", + "sub_var": "drugid" + } ], "picdb": [] } @@ -580,6 +556,14 @@ "callback": "transform_fun(set_val(TRUE))" } ], + "sic": [ + { + "ids": [1397, 1506, 1524, 1525, 1751, 1977], + "table": "medication", + "sub_var": "drugid", + "callback": "transform_fun(set_val(TRUE))" + } + ], "picdb": [] } }, diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index 91dfcc08..9230ed0c 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -82,7 +82,8 @@ 705 ], "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -560,7 +561,7 @@ { "table": "chartevents", "ids": [ - "1001" + 1001 ], "sub_var": "itemid" }, From ee483632c916e37f8831be5f8836f4e281048e48 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 09:16:33 +0100 Subject: [PATCH 24/38] Fix `sic` configs based on https://github.com/prockenschaub/ricu-package/tree/sicdb --- .../config/concept-dict/blood_gas.json | 42 +++++- .../config/concept-dict/chemistry.json | 42 +++--- .../config/concept-dict/demographics.json | 18 +-- .../config/concept-dict/hematology.json | 108 ++++++++++------ .../config/concept-dict/medications.json | 120 +++++++++--------- inst/extdata/config/concept-dict/outcome.json | 18 +++ inst/extdata/config/concept-dict/output.json | 3 +- .../config/concept-dict/respiratory.json | 6 +- inst/extdata/config/concept-dict/vitals.json | 24 +++- 9 files changed, 243 insertions(+), 138 deletions(-) diff --git a/inst/extdata/config/concept-dict/blood_gas.json b/inst/extdata/config/concept-dict/blood_gas.json index 935db222..0138565e 100644 --- a/inst/extdata/config/concept-dict/blood_gas.json +++ b/inst/extdata/config/concept-dict/blood_gas.json @@ -263,7 +263,14 @@ { "ids": 2283, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" + }, + { + "ids": 684, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -310,6 +317,14 @@ "class": "hrd_itm" } ], + "sic": [ + { + "ids": [196, 660], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [ { "table": "labevents", @@ -387,10 +402,12 @@ { "ids": [ 657, + 465, 454 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -462,6 +479,14 @@ "sub_var": "itemid" } ], + "sic": [ + { + "ids": 661, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [ { "table": "labevents", @@ -540,9 +565,10 @@ ], "sic": [ { - "ids": 687, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 687, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -621,7 +647,8 @@ { "ids": 688, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -708,7 +735,8 @@ 689 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index 472be88b..39fbd784 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -1242,7 +1242,8 @@ 656 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1331,12 +1332,10 @@ ], "sic": [ { - "ids": [ - 463, - 685 - ], - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": [463, 685], + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1417,10 +1416,11 @@ ], "sic": [ { - "ids": 468, - "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`*`, 2.431))" + "ids": 468, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 2.431), 'mg/dL')", + "class": "sic_itm" } ], "picdb": [ @@ -1514,7 +1514,8 @@ 686 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1593,10 +1594,11 @@ ], "sic": [ { - "ids": 471, - "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`*`, 3.097521))" + "ids": 471, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 3.097521), 'mg/dL')", + "class": "sic_itm" } ], "picdb": [ @@ -1673,8 +1675,9 @@ { "ids": 481, "table": "laboratory", - "sub_var": "LaboratoryID", - "callback": "transform_fun(binary_op(`/`, 1000))" + "sub_var": "laboratoryid", + "callback": "transform_fun(binary_op(`/`, 1000))", + "class": "sic_itm" } ], "picdb": [ @@ -1744,7 +1747,8 @@ { "ids": 270, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/demographics.json b/inst/extdata/config/concept-dict/demographics.json index 59d944a1..a0f7da0d 100644 --- a/inst/extdata/config/concept-dict/demographics.json +++ b/inst/extdata/config/concept-dict/demographics.json @@ -235,7 +235,7 @@ "sic": [ { "table": "cases", - "val_var": "HeightOnAdmission", + "val_var": "heightonadmission", "class": "col_itm" } ], @@ -317,10 +317,10 @@ ], "sic": [ { - "table": "cases", - "val_var": "Sex", - "callback": "apply_map(c(`735` = 'Male', `736` = 'Female'))", - "class": "col_itm" + "table": "cases", + "val_var": "sex", + "class": "col_itm", + "callback": "transform_fun(sic_sex)" } ], "picdb": [ @@ -403,10 +403,10 @@ ], "sic": [ { - "table": "cases", - "val_var": "WeightOnAdmission", - "class": "col_itm", - "callback": "transform_fun(binary_op(`/`, 1000))" + "table": "cases", + "val_var": "weightonadmission", + "class": "col_itm", + "callback": "transform_fun(binary_op(`/`, 1000))" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 60dee64f..3d9b5ef5 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -200,11 +200,19 @@ } ], "sic": [ - { - "ids": 197, - "table": "laboratory", - "sub_var": "LaboratoryID" - } + { + "ids": 197, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 299, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" + } ], "picdb": [ { @@ -335,11 +343,12 @@ } ], "sic": [ - { - "ids": 344, - "table": "laboratory", - "sub_var": "LaboratoryID" - } + { + "ids": 344, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } ], "picdb": [] } @@ -381,6 +390,14 @@ "sub_var": "itemid" } ], + "sic": [ + { + "ids": 214, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [] } }, @@ -446,7 +463,8 @@ 682 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -538,7 +556,8 @@ 289 ], "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -681,6 +700,13 @@ "ids": 223, "table": "laboratory", "sub_var": "LaboratoryID" + }, + { + "ids": 302, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" } ], "picdb": [] @@ -748,7 +774,8 @@ { "ids": 566, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -824,9 +851,11 @@ ], "sic": [ { - "ids": 290, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 290, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "convert_unit(binary_op(`*`, 0.16114), '%')", + "class": "sic_itm" } ], "picdb": [ @@ -902,7 +931,8 @@ { "ids": 286, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -985,9 +1015,17 @@ ], "sic": [ { - "ids": 230, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 230, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + }, + { + "ids": 308, + "table": "laboratory", + "sub_var": "laboratoryid", + "callback": "blood_cell_ratio", + "class": "sic_itm" } ], "picdb": [ @@ -1069,9 +1107,10 @@ ], "sic": [ { - "ids": 314, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 314, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1138,7 +1177,8 @@ { "ids": 598, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1214,7 +1254,8 @@ { "ids": 597, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1283,9 +1324,10 @@ ], "sic": [ { - "ids": 599, - "table": "laboratory", - "sub_var": "LaboratoryID" + "ids": 599, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ @@ -1351,13 +1393,6 @@ "sub_var": "itemid" } ], - "sic": [ - { - "ids": 3319, - "table": "laboratory", - "sub_var": "LaboratoryID" - } - ], "picdb": [ { "table": "labevents", @@ -1436,7 +1471,8 @@ { "ids": 301, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/medications.json b/inst/extdata/config/concept-dict/medications.json index fbea781c..770deb8f 100644 --- a/inst/extdata/config/concept-dict/medications.json +++ b/inst/extdata/config/concept-dict/medications.json @@ -826,9 +826,10 @@ 1559 ], "table": "medication", - "sub_var": "DrugID", - "stop_var": "OffsetDrugEnd", - "callback": "sic_dur" + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" } ], "picdb": [] @@ -931,16 +932,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } - ], + ], "sic": [ - { - "ids": 1559, - "table": "medication", - "sub_var": "DrugID", - "val_var": "AmountPerMinute", - "stop_var": "OffsetDrugEnd", - "callback": "sic_rate_kg" - } + { + "ids": 1559, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ], "picdb": [] } @@ -1042,13 +1044,14 @@ } ], "sic": [ - { - "ids": 1618, - "table": "medication", - "sub_var": "DrugID", - "stop_var": "OffsetDrugEnd", - "callback": "sic_dur" - } + { + "ids": 1618, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ], "picdb": [] } @@ -1143,16 +1146,17 @@ "stop_var": "endtime", "callback": "mimic_rate_mv" } - ], + ], "sic": [ - { - "ids": 1618, - "table": "medication", - "sub_var": "DrugID", - "val_var": "AmountPerMinute", - "stop_var": "OffsetDrugEnd", - "callback": "sic_rate_kg" - } + { + "ids": 1618, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ], "picdb": [] } @@ -1258,15 +1262,14 @@ } ], "sic": [ - { - "ids": [ - 1502 - ], - "table": "medication", - "sub_var": "DrugID", - "stop_var": "OffsetDrugEnd", - "callback": "sic_dur" - } + { + "ids": 1502, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" + } ], "picdb": [] } @@ -1378,14 +1381,15 @@ } ], "sic": [ - { - "ids": 1502, - "table": "medication", - "sub_var": "DrugID", - "val_var": "AmountPerMinute", - "stop_var": "OffsetDrugEnd", - "callback": "sic_rate_kg" - } + { + "ids": 1502, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" + } ], "picdb": [] } @@ -1600,13 +1604,12 @@ ], "sic": [ { - "ids": [ - 1562 - ], - "table": "medication", - "sub_var": "DrugID", - "stop_var": "OffsetDrugEnd", - "callback": "sic_dur" + "ids": 1562, + "table": "medication", + "sub_var": "drugid", + "stop_var": "offsetdrugend", + "grp_var": "id", + "callback": "default_duration" } ], "picdb": [] @@ -1730,12 +1733,13 @@ ], "sic": [ { - "ids": 1562, - "table": "medication", - "sub_var": "DrugID", - "val_var": "AmountPerMinute", - "stop_var": "OffsetDrugEnd", - "callback": "sic_rate_kg" + "ids": 1562, + "table": "medication", + "sub_var": "drugid", + "val_var": "amountperminute", + "stop_var": "offsetdrugend", + "class": "sic_itm", + "callback": "sic_rate_kg" } ], "picdb": [] diff --git a/inst/extdata/config/concept-dict/outcome.json b/inst/extdata/config/concept-dict/outcome.json index 0d08a873..95009c78 100644 --- a/inst/extdata/config/concept-dict/outcome.json +++ b/inst/extdata/config/concept-dict/outcome.json @@ -79,6 +79,16 @@ "callback": "transform_fun(comp_na(`==`, 1L))", "class": "col_itm" } + ], + "sic": [ + { + "table": "cases", + "index_var": "offsetofdeath", + "adm_time": "offsetafterfirstadmission", + "val_var": "timeofstay", + "callback": "sic_death", + "class": "col_itm" + } ] } }, @@ -206,6 +216,14 @@ "class": "fun_itm" } ], + "sic": [ + { + "table": "cases", + "val_var": "timeofstay", + "callback": "transform_fun(binary_op(`/`, 60 * 60 * 24))", + "class": "col_itm" + } + ], "picdb": [] } }, diff --git a/inst/extdata/config/concept-dict/output.json b/inst/extdata/config/concept-dict/output.json index 1d3796ae..e078d7d7 100644 --- a/inst/extdata/config/concept-dict/output.json +++ b/inst/extdata/config/concept-dict/output.json @@ -148,7 +148,8 @@ { "ids": 725, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/respiratory.json b/inst/extdata/config/concept-dict/respiratory.json index d71d24ef..234531a9 100644 --- a/inst/extdata/config/concept-dict/respiratory.json +++ b/inst/extdata/config/concept-dict/respiratory.json @@ -223,7 +223,8 @@ { "ids": 710, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -349,7 +350,8 @@ { "ids": 719, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index 6c8b37a9..bac04180 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -162,6 +162,14 @@ "sub_var": "itemid" } ], + "sic": [ + { + "ids": 716, + "table": "laboratory", + "sub_var": "laboratoryid", + "class": "sic_itm" + } + ], "picdb": [ ] } @@ -235,9 +243,10 @@ ], "sic": [ { - "ids": 708, - "table": "data_float_h", - "sub_var": "DataID" + "ids": [708, 724], + "table": "data_float_h", + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -352,7 +361,8 @@ 706 ], "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -442,7 +452,8 @@ 704 ], "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ @@ -568,7 +579,8 @@ { "ids": 709, "table": "data_float_h", - "sub_var": "DataID" + "sub_var": "dataid", + "class": "sic_itm" } ], "picdb": [ From c2b8c4961fcb5a5f8633ce0ace26b2a40ea5f480 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 09:39:12 +0100 Subject: [PATCH 25/38] Remove prints and use ricu msg --- R/setup-import.R | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index 2b501803..0673290b 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -184,9 +184,8 @@ import_tbl.tbl_cfg <- function(x, data_dir = src_data_dir(x), progress = NULL, assert_that(is.dir(data_dir), is.flag(cleanup)) - # Print number of parts - print(paste("[import_tbl] Import table ", tbl_name(x))) - print(paste("[import_tbl] Number of parts: ", n_part(x))) + msg_ricu(paste("[import_tbl] Import table ", tbl_name(x))) + msg_ricu(paste("[import_tbl] Number of parts: ", n_part(x))) if (n_part(x) > 1L) { partition_table(x, data_dir, progress, ...) } else { @@ -279,11 +278,10 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, } if (grepl("\\.gz$", file)) { - print("[partition_table] gunzipping") + msg_ricu(paste("[partition_table] gunzip: ", file)) file <- gunzip(file, tempdir) } - print(paste("[partition_table] reading csv chunked with chunk_length: ", chunk_length)) readr::read_csv_chunked(file, process_chunk, chunk_length, col_types = spec, progress = FALSE, ...) From e5061d4915ea5113fc27f8d25cf744de2d284d97 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 11:58:38 +0100 Subject: [PATCH 26/38] Remove redundant `report_probolems` --- R/setup-import.R | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index 0673290b..5af6f8dc 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -426,25 +426,3 @@ report_problems <- function(x, file) { invisible(NULL) } - -# report_problems <- function(x, file) { - -# prob_to_str <- function(x) { -# paste0("[", x[1L], ", ", x[2L], "]: got '", x[4L], "' instead of ", x[3L]) -# } - -# probs <- readr::problems(x) - -# if (nrow(probs)) { - -# probs <- bullet(apply(probs, 1L, prob_to_str)) - -# warn_ricu( -# c("Encountered parsing problems for file {basename(file)}:", probs), -# class = "csv_parsing_error", indent = c(0L, rep_along(2L, probs)), -# exdent = c(0L, rep_along(2L, probs)) -# ) -# } - -# invisible(NULL) -# } From 2c8d76397c60191c40c286757b7b5c83e57dfe31 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 14:00:28 +0100 Subject: [PATCH 27/38] Add prints and tempdir arg --- R/setup-import.R | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index ead910f6..5bb57b46 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -221,16 +221,21 @@ merge_fst_chunks <- function(src, targ, new, old, sort_col, prog, nme, tick) { fst::write_fst(dat, new_file, compress = 100L) - progress_tick(paste(nme, "part", part_no), prog, - coalesce(tick, floor(nrow(dat) / 2))) + # progress_tick(paste(nme, "part", part_no), prog, + # coalesce(tick, floor(nrow(dat) / 2))) invisible(NULL) } -split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick) { +split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick, callback = NULL) { n_row <- nrow(x) + if (!is.null(callback)) { + print("[split_write] apply callback") + x <- callback(x) + } + x <- split(x, part_fun(x)) tmp_nme <- file.path(dir, paste0("part_", names(x)), @@ -241,16 +246,21 @@ split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick) { Map(fst::write_fst, x, tmp_nme) - progress_tick(paste(nme, "chunk", chunk_no), prog, - coalesce(tick, floor(n_row / 2))) + # progress_tick(paste(nme, "chunk", chunk_no), prog, + # coalesce(tick, floor(n_row / 2))) invisible(NULL) } -partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, +partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, tempdir = NULL, ...) { - tempdir <- ensure_dirs(tempfile()) + # tempdir <- ensure_dirs(tempfile()) + if (is.null(tempdir)) { + # tempdir <- ensure_dirs(file.path(dir, "tempdir")) + tempdir <- ensure_dirs(tempfile()) + } + print(paste("[partition_table] tempdir: ", tempdir)) on.exit(unlink(tempdir, recursive = TRUE)) spec <- col_spec(x) @@ -274,8 +284,8 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, process_chunk <- function(x, pos, ...) { report_problems(x, rawf) - split_write(callback(x), pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, - progress, name, tick) + split_write(x, pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, + progress, name, tick, callback) } if (grepl("\\.gz$", file)) { @@ -310,6 +320,7 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, tick <- 1L } + print(paste("[partition_table] merge_fst_chunks")) for (src_dir in file.path(tempdir, paste0("part_", seq_len(n_part(x))))) { merge_fst_chunks(src_dir, targ, newc, oldc, pcol, progress, name, tick) } @@ -357,7 +368,7 @@ gunzip <- function(file, exdir) { return(dest) } -csv_to_fst <- function(x, dir, progress = NULL, ...) { +csv_to_fst <- function(x, dir, progress = NULL, tempdir = NULL, ...) { raw <- raw_file_name(x) src <- file.path(dir, raw) From 14a14033a924b8cef35e7f9f5e27c6d56f4e30ec Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 14:02:23 +0100 Subject: [PATCH 28/38] Cleanup prints --- R/setup-import.R | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index a56a79cc..98bd69ed 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -230,11 +230,6 @@ split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick, callback = n_row <- nrow(x) - if (!is.null(callback)) { - print("[split_write] apply callback") - x <- callback(x) - } - x <- split(x, part_fun(x)) tmp_nme <- file.path(dir, paste0("part_", names(x)), @@ -259,7 +254,7 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, temp # tempdir <- ensure_dirs(file.path(dir, "tempdir")) tempdir <- ensure_dirs(tempfile()) } - print(paste("[partition_table] tempdir: ", tempdir)) + msg_ricu(paste("[partition_table] tempdir: ", tempdir)) on.exit(unlink(tempdir, recursive = TRUE)) spec <- col_spec(x) @@ -316,7 +311,6 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, temp tick <- 1L } - print(paste("[partition_table] merge_fst_chunks")) for (src_dir in file.path(tempdir, paste0("part_", seq_len(n_part(x))))) { merge_fst_chunks(src_dir, targ, newc, oldc, pcol, progress, name, tick) } From d96d9fe4b636103df9a98c263f4815d2ca9abd48 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 15:31:38 +0100 Subject: [PATCH 29/38] Fix blood_gas config --- inst/extdata/config/concept-dict/blood_gas.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/config/concept-dict/blood_gas.json b/inst/extdata/config/concept-dict/blood_gas.json index 0138565e..0c2c498d 100644 --- a/inst/extdata/config/concept-dict/blood_gas.json +++ b/inst/extdata/config/concept-dict/blood_gas.json @@ -155,7 +155,7 @@ "table": "laboratory", "sub_var": "laboratoryid", "class": "sic_itm" - }, + } ], "picdb": [ { From fa37f630cb720f5f04aef097abc93625e0900060 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 16:28:23 +0100 Subject: [PATCH 30/38] Fix sic table config --- inst/extdata/config/data-sources/sic.json | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/inst/extdata/config/data-sources/sic.json b/inst/extdata/config/data-sources/sic.json index 351f1ab9..5d527e36 100644 --- a/inst/extdata/config/data-sources/sic.json +++ b/inst/extdata/config/data-sources/sic.json @@ -6,14 +6,14 @@ "patient": { "id": "patientid", "position": 1, - "start": "icuoffset", + "start": "firstadmission", "end": "offsetofdeath", "table": "cases" }, "icustay": { "id": "caseid", "position": 2, - "start": "icuoffset", + "start": "offsetafterfirstadmission", "end": "timeofstay", "table": "cases" } @@ -25,10 +25,10 @@ "index_var": "icuoffset", "time_vars": [ "icuoffset", - "offsetofdeath", "heartsurgerybeginoffset", "heartsurgeryendoffset", - "offsetafterfirstadmission" + "offsetafterfirstadmission", + "offsetofdeath" ] }, "num_rows": 27386, @@ -235,7 +235,8 @@ "data_ref": { "files": "data_ref.csv.gz", "defaults": { - "index_var": "offsetafterfirstadmission" + "index_var": "offsetafterfirstadmission", + "time_vars": ["offsetafterfirstadmission"] }, "num_rows": 354157, "cols": { From 9f152c3c3f964b150e4d0e5285865d93a508be84 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 16:31:56 +0100 Subject: [PATCH 31/38] Use finer resolution rawdata where available --- inst/extdata/config/concept-dict/blood_gas.json | 1 + inst/extdata/config/concept-dict/output.json | 1 + inst/extdata/config/concept-dict/respiratory.json | 2 ++ inst/extdata/config/concept-dict/vitals.json | 5 +++++ 4 files changed, 9 insertions(+) diff --git a/inst/extdata/config/concept-dict/blood_gas.json b/inst/extdata/config/concept-dict/blood_gas.json index 0c2c498d..2b536a4c 100644 --- a/inst/extdata/config/concept-dict/blood_gas.json +++ b/inst/extdata/config/concept-dict/blood_gas.json @@ -263,6 +263,7 @@ { "ids": 2283, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" }, diff --git a/inst/extdata/config/concept-dict/output.json b/inst/extdata/config/concept-dict/output.json index e078d7d7..73953d48 100644 --- a/inst/extdata/config/concept-dict/output.json +++ b/inst/extdata/config/concept-dict/output.json @@ -148,6 +148,7 @@ { "ids": 725, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } diff --git a/inst/extdata/config/concept-dict/respiratory.json b/inst/extdata/config/concept-dict/respiratory.json index 234531a9..5e64f8fe 100644 --- a/inst/extdata/config/concept-dict/respiratory.json +++ b/inst/extdata/config/concept-dict/respiratory.json @@ -223,6 +223,7 @@ { "ids": 710, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -350,6 +351,7 @@ { "ids": 719, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index bac04180..395fa144 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -82,6 +82,7 @@ 705 ], "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -245,6 +246,7 @@ { "ids": [708, 724], "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -361,6 +363,7 @@ 706 ], "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -452,6 +455,7 @@ 704 ], "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } @@ -579,6 +583,7 @@ { "ids": 709, "table": "data_float_h", + "val_var": "rawdata", "sub_var": "dataid", "class": "sic_itm" } From 74a66d9785ba3fb78d6d9dc396942dadf748cb42 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 16:54:23 +0100 Subject: [PATCH 32/38] Pass tbl callback correctly --- R/config-utils.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/config-utils.R b/R/config-utils.R index 4a6dbc07..4b036943 100644 --- a/R/config-utils.R +++ b/R/config-utils.R @@ -396,6 +396,7 @@ tbl_callback <- function(x){ callback_field <- vctrs::field(x, "callback") if (is.character(callback_field)) { + msg_ricu(paste("[tbl_callback] Using callback function: ", callback_field)) return(str_to_fun(callback_field)) } @@ -405,7 +406,8 @@ tbl_callback <- function(x){ callback_value <- callback_field[[1]] if (is.character(callback_value)) { - return(str_to_fun(callback_field[1])) + msg_ricu(paste("[tbl_callback] Using callback function: ", callback_value)) + return(str_to_fun(callback_value)) } return(identity_callback) From 84ec51ff0a767429cbd647b57345169b88695a78 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 17:02:53 +0100 Subject: [PATCH 33/38] Fix missing callback application --- R/setup-import.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/setup-import.R b/R/setup-import.R index 98bd69ed..3cfcc487 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -229,7 +229,9 @@ merge_fst_chunks <- function(src, targ, new, old, sort_col, prog, nme, tick) { split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick, callback = NULL) { n_row <- nrow(x) - + if (!is.null(callback)) { + x <- callback(x) + } x <- split(x, part_fun(x)) tmp_nme <- file.path(dir, paste0("part_", names(x)), From 99529cdec8dd16860007231c05a6f5c153bab645 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Mon, 25 Mar 2024 17:31:32 +0100 Subject: [PATCH 34/38] Apply callback before split_write --- R/setup-import.R | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/R/setup-import.R b/R/setup-import.R index 3cfcc487..ab6388ec 100644 --- a/R/setup-import.R +++ b/R/setup-import.R @@ -226,12 +226,10 @@ merge_fst_chunks <- function(src, targ, new, old, sort_col, prog, nme, tick) { invisible(NULL) } -split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick, callback = NULL) { +split_write <- function(x, part_fun, dir, chunk_no, prog, nme, tick) { n_row <- nrow(x) - if (!is.null(callback)) { - x <- callback(x) - } + x <- split(x, part_fun(x)) tmp_nme <- file.path(dir, paste0("part_", names(x)), @@ -280,8 +278,8 @@ partition_table <- function(x, dir, progress = NULL, chunk_length = 10 ^ 7, temp process_chunk <- function(x, pos, ...) { report_problems(x, rawf) - split_write(x, pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, - progress, name, tick, callback) + split_write(callback(x), pfun, tempdir, ((pos - 1L) / chunk_length) + 1L, + progress, name, tick) } if (grepl("\\.gz$", file)) { From f54afad4770f784aabb6fff4f5deba5e4eccc23c Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 09:47:14 +0100 Subject: [PATCH 35/38] Config updates: - Fix sic bugs - Slack temp range --- inst/extdata/config/concept-dict/chemistry.json | 6 ++++-- inst/extdata/config/concept-dict/hematology.json | 8 +++++--- inst/extdata/config/concept-dict/vitals.json | 4 ++-- 3 files changed, 11 insertions(+), 7 deletions(-) diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index 39fbd784..fab37aee 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -876,7 +876,8 @@ "ids": 253, "table": "laboratory", "sub_var": "laboratoryid", - "class": "sic_itm" + "class": "sic_itm", + "callback": "convert_unit(identity_callback, 'ng/mL', 'µg/l')" } ], "picdb": [ @@ -1133,7 +1134,8 @@ "ids": 341, "table": "laboratory", "sub_var": "laboratoryid", - "class": "sic_itm" + "class": "sic_itm", + "callback": "convert_unit(binary_op(`*`, 10), 'mg/L', 'mg/dl')" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 3d9b5ef5..8aac6cf7 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -699,7 +699,8 @@ { "ids": 223, "table": "laboratory", - "sub_var": "LaboratoryID" + "sub_var": "laboratoryid", + "class": "sic_itm" }, { "ids": 302, @@ -854,7 +855,7 @@ "ids": 290, "table": "laboratory", "sub_var": "laboratoryid", - "callback": "convert_unit(binary_op(`*`, 0.16114), '%')", + "callback": "convert_unit(binary_op(`*`, 1.6114), '%', 'g/dl')", "class": "sic_itm" } ], @@ -1327,7 +1328,8 @@ "ids": 599, "table": "laboratory", "sub_var": "laboratoryid", - "class": "sic_itm" + "class": "sic_itm", + "callback": "convert_unit(identity_callback, 'm/uL', 'T/L.')" } ], "picdb": [ diff --git a/inst/extdata/config/concept-dict/vitals.json b/inst/extdata/config/concept-dict/vitals.json index 395fa144..75967e5e 100644 --- a/inst/extdata/config/concept-dict/vitals.json +++ b/inst/extdata/config/concept-dict/vitals.json @@ -476,8 +476,8 @@ "C", "\u00b0C" ], - "min": 32, - "max": 42, + "min": 30, + "max": 44, "description": "temperature", "omopid": 4302666, "category": "vitals", From ad6b07e6c7e8af96e41beff1bc0cf77c071fcf2f Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 09:57:29 +0100 Subject: [PATCH 36/38] Fix configs --- inst/extdata/config/concept-dict/chemistry.json | 2 +- inst/extdata/config/concept-dict/hematology.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index fab37aee..712c7e36 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -1678,7 +1678,7 @@ "ids": 481, "table": "laboratory", "sub_var": "laboratoryid", - "callback": "transform_fun(binary_op(`/`, 1000))", + "callback": "transform_fun(binary_op(`/`, 1000), 'ng/mL', 'ng/L')", "class": "sic_itm" } ], diff --git a/inst/extdata/config/concept-dict/hematology.json b/inst/extdata/config/concept-dict/hematology.json index 8aac6cf7..735e071b 100644 --- a/inst/extdata/config/concept-dict/hematology.json +++ b/inst/extdata/config/concept-dict/hematology.json @@ -855,7 +855,7 @@ "ids": 290, "table": "laboratory", "sub_var": "laboratoryid", - "callback": "convert_unit(binary_op(`*`, 1.6114), '%', 'g/dl')", + "callback": "convert_unit(identity_callback, '%', 'g/dl')", "class": "sic_itm" } ], From 640d4d27812bee1238b8bfc1ca4775a0dbd47774 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 10:16:41 +0100 Subject: [PATCH 37/38] Fix callback --- inst/extdata/config/concept-dict/chemistry.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/extdata/config/concept-dict/chemistry.json b/inst/extdata/config/concept-dict/chemistry.json index 712c7e36..12e92d91 100644 --- a/inst/extdata/config/concept-dict/chemistry.json +++ b/inst/extdata/config/concept-dict/chemistry.json @@ -1678,7 +1678,7 @@ "ids": 481, "table": "laboratory", "sub_var": "laboratoryid", - "callback": "transform_fun(binary_op(`/`, 1000), 'ng/mL', 'ng/L')", + "callback": "convert_unit(binary_op(`/`, 1000), 'ng/mL', 'ng/L')", "class": "sic_itm" } ], From 9ebaf7f97e7974f0c76e118cb499cc4293ecbc16 Mon Sep 17 00:00:00 2001 From: Manuel Burger Date: Tue, 26 Mar 2024 14:10:57 +0100 Subject: [PATCH 38/38] Use `apply_map` for `sic` `sex` --- R/callback-itm.R | 10 ---------- inst/extdata/config/concept-dict/demographics.json | 2 +- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/R/callback-itm.R b/R/callback-itm.R index 22a971fc..941470b7 100644 --- a/R/callback-itm.R +++ b/R/callback-itm.R @@ -213,16 +213,6 @@ mimic_age <- function(x) { eicu_age <- function(x) as.numeric(ifelse(x == "> 89", 90, x)) -sic_sex <- function(x) { - ifelse( - x == 735, - "Male", - ifelse(x == 736, - "Female", - NA_character_ - )) -} - hirid_death <- function(x, val_var, sub_var, env, ...) { dis <- "discharge_status" diff --git a/inst/extdata/config/concept-dict/demographics.json b/inst/extdata/config/concept-dict/demographics.json index a0f7da0d..f4723d6d 100644 --- a/inst/extdata/config/concept-dict/demographics.json +++ b/inst/extdata/config/concept-dict/demographics.json @@ -320,7 +320,7 @@ "table": "cases", "val_var": "sex", "class": "col_itm", - "callback": "transform_fun(sic_sex)" + "callback": "apply_map(c(`735` = 'Male', `736` = 'Female'))" } ], "picdb": [