diff --git a/R/ADTnorm.R b/R/ADTnorm.R index e805e99..a7eb4cd 100644 --- a/R/ADTnorm.R +++ b/R/ADTnorm.R @@ -118,12 +118,16 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL stop("Please provide the save_outpath to save the intermediate figures in pdf.") } if(!is.null(target_landmark_location)){ ## Check if user wants to align to a fixed location. - if(target_landmark_location == "fixed"){ ## Currently default fixed alignemnt locations are set to 1 and 5 for better visualization. - print("Will align negative peak to 1 and right-most positive peak to 5.") - target_landmark_location = c(1, 5) + if(length(target_landmark_location) == 1){ ## Currently default fixed alignemnt locations are set to 1 and 5 for better visualization. + if(target_landmark_location == "fixed"){ + print("Will align negative peak to 1 and right-most positive peak to 5.") + target_landmark_location = c(1, 5) + }else{ + stop("Please provide NULL, or fixed, or a two-element vector to target_landmark_location!") + } }else{ ## If user provide the fixed alignment location, align to the user-set locations to align the negative and the right-most positive peak. if(length(target_landmark_location) == 2 && target_landmark_location[1] < target_landmark_location[2]){ - print(paste0("Will align negative peak to", target_landmark_location[1], " and right-most positive peak to ", target_landmark_location[2])) + print(paste0("Will align negative peak to ", target_landmark_location[1], " and right-most positive peak to ", target_landmark_location[2])) }else{ stop("Please provide two elements vector to target_landmark_location where the first element is smaller!") } @@ -137,6 +141,9 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL ## DATA PROCESSING AND CLEANNING ## ============================== ## Remove ADT marker if it only has zero value across all the cells + cell_x_adt = data.frame(cell_x_adt) + cell_x_feature = data.frame(cell_x_feature) + col_sums = colSums(cell_x_adt, na.rm = TRUE) if (any(col_sums == 0)){ message("Markers with zero counts will be ignored") @@ -265,9 +272,10 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL peak_replace_len = len_provided provide_replace_len = len_provided }else if(len_needed < len_provided){ - print(paste0("Identified ", len_needed, " peaks but more peaks (", len_provided, ") are provided. Will only use the first ", len_needed, " peaks provided to override the peaks detected by ADTnorm.")) - peak_replace_len = len_needed - provide_replace_len = len_needed + print(paste0("Identified ", len_needed, " peaks but more peaks (", len_provided, ") are provided. Will use ", len_provided, " peaks provided to override the peaks detected by ADTnorm.")) + peak_replace_len = len_provided + provide_replace_len = len_provided + peak_mode_res = cbind(peak_mode_res, matrix(NA, nrow = nrow(peak_mode_res), ncol = len_provided - len_needed)) }else{ peak_replace_len = len_needed provide_replace_len = len_provided @@ -288,9 +296,10 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL valley_replace_len = len_provided provide_replace_len = len_provided }else if(len_needed < len_provided){ - print(paste0("Identified ", len_needed, " valleys but more valleys (", len_provided, ") are provided. Will only use the first ", len_needed, " valleys provided to override the valleys detected by ADTnorm.")) - valley_replace_len = len_needed - provide_replace_len = len_needed + print(paste0("Identified ", len_needed, " valleys but more valleys (", len_provided, ") are provided. Will use ", len_provided, " valleys provided to override the valleys detected by ADTnorm.")) + valley_replace_len = len_provided + provide_replace_len = len_provided + valley_location_res = cbind(valley_location_res, matrix(NA, nrow = nrow(valley_location_res), ncol = len_provided - len_needed)) }else{ valley_replace_len = len_needed provide_replace_len = len_provided @@ -312,10 +321,11 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL landmark_pos[, seq(1, num_landmark, 2)] = peak_mode_res landmark_pos[, seq(2, num_landmark, 2)] = valley_location_res rownames(landmark_pos) = rownames(peak_mode_res) + colnames(landmark_pos) = paste0("col", 1:num_landmark) colnames(landmark_pos)[seq(1, num_landmark, 2)] = paste0("peak", 1:ncol(peak_mode_res)) colnames(landmark_pos)[seq(2, num_landmark, 2)] = paste0("valley", 1:ncol(valley_location_res)) - landmark_pos_customized = get_customize_landmark(cell_x_adt_sample, landmark_pos, bw = 0.2, adt_marker_select_name = adt_marker_select_name) + landmark_pos_customized = get_customize_landmark(cell_x_adt_sample, landmark_pos, bw = 0.1, adt_marker_select_name = adt_marker_select_name) peak_mode_res = landmark_pos_customized[, seq(1, num_landmark, 2), drop = FALSE] valley_location_res = landmark_pos_customized[, seq(2, num_landmark, 2), drop = FALSE] @@ -397,9 +407,9 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL }else{ target_landmark = NULL } - peak_alignment_res = peak_alignment(cell_x_adt[, adt_marker_select], cell_x_feature, landmark_matrix, target_landmark = target_landmark) + peak_alignment_res = peak_alignment(cell_x_adt[, adt_marker_select], cell_x_feature, landmark_matrix, target_landmark = target_landmark, neg_candidate_thres = neg_candidate_thres) cell_x_adt_norm[, adt_marker_select] = peak_alignment_res[[1]] - + if(ncol(peak_alignment_res[[2]]) == 2){ peak_mode_norm_res = peak_alignment_res[[2]][, 1, drop = FALSE] valley_location_norm_res = peak_alignment_res[[2]][, 2, drop = FALSE] diff --git a/R/get_peak_midpoint.R b/R/get_peak_midpoint.R index 5197c52..21e3429 100644 --- a/R/get_peak_midpoint.R +++ b/R/get_peak_midpoint.R @@ -84,9 +84,9 @@ get_peak_midpoint = function(cell_x_adt = NULL, cell_x_feature = NULL, adt_marke # exprs(dat[[sample_name]])[, adt_marker_select] = adt_expression } } - fres1 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 2)) - fres2 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 3)) - fres3 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 3.1)) + fres1 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(2, bwFac_smallest))) + fres2 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(3, bwFac_smallest))) + fres3 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(3.1, bwFac_smallest))) ## different bandwidth w.r.t the zero proportion. if (zero_prop > 0.5) { diff --git a/R/get_peak_mode.R b/R/get_peak_mode.R index 3de2a58..d51a6b3 100644 --- a/R/get_peak_mode.R +++ b/R/get_peak_mode.R @@ -73,15 +73,15 @@ get_peak_mode = function(cell_x_adt = NULL, cell_x_feature = NULL, adt_marker_se adt_expression = cell_x_adt[cell_ind, adt_marker_select] ## adt value for this marker and this sample ## if most are around 0 and there are very few unique value: add random small number - if(zero_prop > 0.95){ + if(zero_prop >= 0.95){ if(length(unique(adt_expression)) < 50){ adt_expression = adt_expression + stats::rnorm(length(adt_expression), mean = 0, sd = 0.05) # exprs(dat[[sample_name]])[, adt_marker_select] = adt_expression } } - fres1 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 2)) - fres2 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 3)) - fres3 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 3.1)) + fres1 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(2, bwFac_smallest))) + fres2 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(3, bwFac_smallest))) + fres3 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(3.1, bwFac_smallest))) ## different bandwidth w.r.t the zero proportion. if (zero_prop > 0.5) { diff --git a/R/get_valley_location.R b/R/get_valley_location.R index 362c07a..7accfd6 100644 --- a/R/get_valley_location.R +++ b/R/get_valley_location.R @@ -131,7 +131,12 @@ get_valley_location = function(cell_x_adt = NULL, cell_x_feature = NULL, adt_mar shoulder_cand_index = which(diff(y)/diff(x) > shoulder_valley_slope) first_peak_index = (which(x > max(x_peak[1], real_peak[1])) %>% min) + 50 x_shoulder = x[shoulder_cand_index[shoulder_cand_index > first_peak_index][1]] - real_valley = min(x_shoulder, real_valley, na.rm = T) + if(is.na(x_shoulder) & is.na(real_valley)){ + stop("No valley is detected. Please consider increasing 'valley_density_adjust'.") + }else{ + real_valley = min(x_shoulder, real_valley, na.rm = T) + } + }else{ ## check if no valley is detected due to shoulder peak diff --git a/R/peak_alignment.R b/R/peak_alignment.R index 40a6e8b..41c9017 100644 --- a/R/peak_alignment.R +++ b/R/peak_alignment.R @@ -13,14 +13,14 @@ # require(dplyr) # require(flowStats) # require(fda) -peak_alignment = function(cell_x_adt, cell_x_feature = NULL, landmark_matrix = NULL, target_landmark = NULL) { +peak_alignment = function(cell_x_adt, cell_x_feature = NULL, landmark_matrix = NULL, target_landmark = NULL, neg_candidate_thres = asinh(8/5 + 1)) { ## get parameters grouping = NULL monwrd = TRUE subsample = NULL peakNr = NULL clipRange = 0.01 - nbreaks = 11 + nbreaks = 11 #11 bwFac = 2 warpFuns = FALSE chunksinze = 10 @@ -34,14 +34,18 @@ peak_alignment = function(cell_x_adt, cell_x_feature = NULL, landmark_matrix = N ## set up fda parameters extend = 0.15 - from = min(cell_x_adt, na.rm = TRUE) - diff(range(cell_x_adt, na.rm = TRUE)) * extend - to = max(cell_x_adt, na.rm = TRUE) + diff(range(cell_x_adt, na.rm = TRUE)) * extend + from = min(c(min(cell_x_adt, na.rm = TRUE), target_landmark[1], min(landmark_matrix))) - diff(range(cell_x_adt, na.rm = TRUE)) * extend + to = max(c(max(cell_x_adt, na.rm = TRUE), target_landmark[length(target_landmark)], max(landmark_matrix))) + diff(range(cell_x_adt, na.rm = TRUE)) * extend + + lower_bound = min(cell_x_adt, na.rm = TRUE) - diff(range(cell_x_adt, na.rm = TRUE)) * extend + upper_bound = max(cell_x_adt, na.rm = TRUE) + diff(range(cell_x_adt, na.rm = TRUE)) * extend + wbasis = fda::create.bspline.basis( rangeval = c(from, to), norder = 4, breaks = seq(from, to, len = nbreaks) ) Wfd0 = fda::fd(matrix(0, wbasis$nbasis, 1), wbasis) - WfdPar = fda::fdPar(Wfd0, 1, 1e-4) + WfdPar = fda::fdPar(Wfd0, 1, 1e-5) density_y = c() @@ -77,6 +81,15 @@ peak_alignment = function(cell_x_adt, cell_x_feature = NULL, landmark_matrix = N environment(funsBack[[samples[j]]]) = e2 } } else { ## more than one landmark: warping + ## if any valley is beyond the upper bound of range, replace by the upper bound + if(any(landmark_matrix[, 2] > upper_bound)){ + landmark_matrix[which(landmark_matrix[, 2] > upper_bound), 2] = max(cell_x_adt, na.rm = TRUE) + print(paste0("Warning: some valley landmarks are larger the upper bound of the range. They are replaced by the maximum value of cell_x_adt. Please consider reduce 'neg_candidate_thres' value. The default value for 'neg_candidate_thres' is asinh(8/5 + 1) and the current value for 'neg_candidate_thres' is ", neg_candidate_thres, ".")) + } + if(any(landmark_matrix[, 1] < lower_bound)){ + landmark_matrix[which(landmark_matrix[, 1] < lower_bound), 1] = min(cell_x_adt, na.rm = TRUE) + print("Warning: some valley landmarks are smaller than the lower bound of the range. They are replaced by the minimum value of cell_x_adt.") + } args = list("unregfd" = fdobj, "fdobj"=fdobj, "ximarks"=landmark_matrix, "WfdPar"=WfdPar, "monwrd"=monwrd) if(!is.null(target_landmark)){ args[['x0marks']] = target_landmark