Skip to content

Commit

Permalink
landmark beyond function range
Browse files Browse the repository at this point in the history
  • Loading branch information
yezhengSTAT authored and yezhengSTAT committed May 14, 2024
1 parent f215c28 commit bbcee3f
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 26 deletions.
36 changes: 23 additions & 13 deletions R/ADTnorm.R
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,16 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL
stop("Please provide the save_outpath to save the intermediate figures in pdf.")
}
if(!is.null(target_landmark_location)){ ## Check if user wants to align to a fixed location.
if(target_landmark_location == "fixed"){ ## Currently default fixed alignemnt locations are set to 1 and 5 for better visualization.
print("Will align negative peak to 1 and right-most positive peak to 5.")
target_landmark_location = c(1, 5)
if(length(target_landmark_location) == 1){ ## Currently default fixed alignemnt locations are set to 1 and 5 for better visualization.
if(target_landmark_location == "fixed"){
print("Will align negative peak to 1 and right-most positive peak to 5.")
target_landmark_location = c(1, 5)
}else{
stop("Please provide NULL, or fixed, or a two-element vector to target_landmark_location!")
}
}else{ ## If user provide the fixed alignment location, align to the user-set locations to align the negative and the right-most positive peak.
if(length(target_landmark_location) == 2 && target_landmark_location[1] < target_landmark_location[2]){
print(paste0("Will align negative peak to", target_landmark_location[1], " and right-most positive peak to ", target_landmark_location[2]))
print(paste0("Will align negative peak to ", target_landmark_location[1], " and right-most positive peak to ", target_landmark_location[2]))
}else{
stop("Please provide two elements vector to target_landmark_location where the first element is smaller!")
}
Expand All @@ -137,6 +141,9 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL
## DATA PROCESSING AND CLEANNING
## ==============================
## Remove ADT marker if it only has zero value across all the cells
cell_x_adt = data.frame(cell_x_adt)
cell_x_feature = data.frame(cell_x_feature)

col_sums = colSums(cell_x_adt, na.rm = TRUE)
if (any(col_sums == 0)){
message("Markers with zero counts will be ignored")
Expand Down Expand Up @@ -265,9 +272,10 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL
peak_replace_len = len_provided
provide_replace_len = len_provided
}else if(len_needed < len_provided){
print(paste0("Identified ", len_needed, " peaks but more peaks (", len_provided, ") are provided. Will only use the first ", len_needed, " peaks provided to override the peaks detected by ADTnorm."))
peak_replace_len = len_needed
provide_replace_len = len_needed
print(paste0("Identified ", len_needed, " peaks but more peaks (", len_provided, ") are provided. Will use ", len_provided, " peaks provided to override the peaks detected by ADTnorm."))
peak_replace_len = len_provided
provide_replace_len = len_provided
peak_mode_res = cbind(peak_mode_res, matrix(NA, nrow = nrow(peak_mode_res), ncol = len_provided - len_needed))
}else{
peak_replace_len = len_needed
provide_replace_len = len_provided
Expand All @@ -288,9 +296,10 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL
valley_replace_len = len_provided
provide_replace_len = len_provided
}else if(len_needed < len_provided){
print(paste0("Identified ", len_needed, " valleys but more valleys (", len_provided, ") are provided. Will only use the first ", len_needed, " valleys provided to override the valleys detected by ADTnorm."))
valley_replace_len = len_needed
provide_replace_len = len_needed
print(paste0("Identified ", len_needed, " valleys but more valleys (", len_provided, ") are provided. Will use ", len_provided, " valleys provided to override the valleys detected by ADTnorm."))
valley_replace_len = len_provided
provide_replace_len = len_provided
valley_location_res = cbind(valley_location_res, matrix(NA, nrow = nrow(valley_location_res), ncol = len_provided - len_needed))
}else{
valley_replace_len = len_needed
provide_replace_len = len_provided
Expand All @@ -312,10 +321,11 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL
landmark_pos[, seq(1, num_landmark, 2)] = peak_mode_res
landmark_pos[, seq(2, num_landmark, 2)] = valley_location_res
rownames(landmark_pos) = rownames(peak_mode_res)
colnames(landmark_pos) = paste0("col", 1:num_landmark)
colnames(landmark_pos)[seq(1, num_landmark, 2)] = paste0("peak", 1:ncol(peak_mode_res))
colnames(landmark_pos)[seq(2, num_landmark, 2)] = paste0("valley", 1:ncol(valley_location_res))

landmark_pos_customized = get_customize_landmark(cell_x_adt_sample, landmark_pos, bw = 0.2, adt_marker_select_name = adt_marker_select_name)
landmark_pos_customized = get_customize_landmark(cell_x_adt_sample, landmark_pos, bw = 0.1, adt_marker_select_name = adt_marker_select_name)
peak_mode_res = landmark_pos_customized[, seq(1, num_landmark, 2), drop = FALSE]
valley_location_res = landmark_pos_customized[, seq(2, num_landmark, 2), drop = FALSE]

Expand Down Expand Up @@ -397,9 +407,9 @@ ADTnorm = function(cell_x_adt = NULL, cell_x_feature = NULL, save_outpath = NULL
}else{
target_landmark = NULL
}
peak_alignment_res = peak_alignment(cell_x_adt[, adt_marker_select], cell_x_feature, landmark_matrix, target_landmark = target_landmark)
peak_alignment_res = peak_alignment(cell_x_adt[, adt_marker_select], cell_x_feature, landmark_matrix, target_landmark = target_landmark, neg_candidate_thres = neg_candidate_thres)
cell_x_adt_norm[, adt_marker_select] = peak_alignment_res[[1]]

if(ncol(peak_alignment_res[[2]]) == 2){
peak_mode_norm_res = peak_alignment_res[[2]][, 1, drop = FALSE]
valley_location_norm_res = peak_alignment_res[[2]][, 2, drop = FALSE]
Expand Down
6 changes: 3 additions & 3 deletions R/get_peak_midpoint.R
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ get_peak_midpoint = function(cell_x_adt = NULL, cell_x_feature = NULL, adt_marke
# exprs(dat[[sample_name]])[, adt_marker_select] = adt_expression
}
}
fres1 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 2))
fres2 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 3))
fres3 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 3.1))
fres1 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(2, bwFac_smallest)))
fres2 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(3, bwFac_smallest)))
fres3 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(3.1, bwFac_smallest)))

## different bandwidth w.r.t the zero proportion.
if (zero_prop > 0.5) {
Expand Down
8 changes: 4 additions & 4 deletions R/get_peak_mode.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,15 +73,15 @@ get_peak_mode = function(cell_x_adt = NULL, cell_x_feature = NULL, adt_marker_se
adt_expression = cell_x_adt[cell_ind, adt_marker_select] ## adt value for this marker and this sample

## if most are around 0 and there are very few unique value: add random small number
if(zero_prop > 0.95){
if(zero_prop >= 0.95){
if(length(unique(adt_expression)) < 50){
adt_expression = adt_expression + stats::rnorm(length(adt_expression), mean = 0, sd = 0.05)
# exprs(dat[[sample_name]])[, adt_marker_select] = adt_expression
}
}
fres1 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 2))
fres2 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 3))
fres3 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = 3.1))
fres1 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(2, bwFac_smallest)))
fres2 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(3, bwFac_smallest)))
fres3 = flowCore::filter(fcs, flowStats::curv1Filter(adt_marker_select, bwFac = max(3.1, bwFac_smallest)))

## different bandwidth w.r.t the zero proportion.
if (zero_prop > 0.5) {
Expand Down
7 changes: 6 additions & 1 deletion R/get_valley_location.R
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,12 @@ get_valley_location = function(cell_x_adt = NULL, cell_x_feature = NULL, adt_mar
shoulder_cand_index = which(diff(y)/diff(x) > shoulder_valley_slope)
first_peak_index = (which(x > max(x_peak[1], real_peak[1])) %>% min) + 50
x_shoulder = x[shoulder_cand_index[shoulder_cand_index > first_peak_index][1]]
real_valley = min(x_shoulder, real_valley, na.rm = T)
if(is.na(x_shoulder) & is.na(real_valley)){
stop("No valley is detected. Please consider increasing 'valley_density_adjust'.")
}else{
real_valley = min(x_shoulder, real_valley, na.rm = T)
}


}else{
## check if no valley is detected due to shoulder peak
Expand Down
23 changes: 18 additions & 5 deletions R/peak_alignment.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
# require(dplyr)
# require(flowStats)
# require(fda)
peak_alignment = function(cell_x_adt, cell_x_feature = NULL, landmark_matrix = NULL, target_landmark = NULL) {
peak_alignment = function(cell_x_adt, cell_x_feature = NULL, landmark_matrix = NULL, target_landmark = NULL, neg_candidate_thres = asinh(8/5 + 1)) {
## get parameters
grouping = NULL
monwrd = TRUE
subsample = NULL
peakNr = NULL
clipRange = 0.01
nbreaks = 11
nbreaks = 11 #11
bwFac = 2
warpFuns = FALSE
chunksinze = 10
Expand All @@ -34,14 +34,18 @@ peak_alignment = function(cell_x_adt, cell_x_feature = NULL, landmark_matrix = N

## set up fda parameters
extend = 0.15
from = min(cell_x_adt, na.rm = TRUE) - diff(range(cell_x_adt, na.rm = TRUE)) * extend
to = max(cell_x_adt, na.rm = TRUE) + diff(range(cell_x_adt, na.rm = TRUE)) * extend
from = min(c(min(cell_x_adt, na.rm = TRUE), target_landmark[1], min(landmark_matrix))) - diff(range(cell_x_adt, na.rm = TRUE)) * extend
to = max(c(max(cell_x_adt, na.rm = TRUE), target_landmark[length(target_landmark)], max(landmark_matrix))) + diff(range(cell_x_adt, na.rm = TRUE)) * extend

lower_bound = min(cell_x_adt, na.rm = TRUE) - diff(range(cell_x_adt, na.rm = TRUE)) * extend
upper_bound = max(cell_x_adt, na.rm = TRUE) + diff(range(cell_x_adt, na.rm = TRUE)) * extend

wbasis = fda::create.bspline.basis(
rangeval = c(from, to),
norder = 4, breaks = seq(from, to, len = nbreaks)
)
Wfd0 = fda::fd(matrix(0, wbasis$nbasis, 1), wbasis)
WfdPar = fda::fdPar(Wfd0, 1, 1e-4)
WfdPar = fda::fdPar(Wfd0, 1, 1e-5)


density_y = c()
Expand Down Expand Up @@ -77,6 +81,15 @@ peak_alignment = function(cell_x_adt, cell_x_feature = NULL, landmark_matrix = N
environment(funsBack[[samples[j]]]) = e2
}
} else { ## more than one landmark: warping
## if any valley is beyond the upper bound of range, replace by the upper bound
if(any(landmark_matrix[, 2] > upper_bound)){
landmark_matrix[which(landmark_matrix[, 2] > upper_bound), 2] = max(cell_x_adt, na.rm = TRUE)
print(paste0("Warning: some valley landmarks are larger the upper bound of the range. They are replaced by the maximum value of cell_x_adt. Please consider reduce 'neg_candidate_thres' value. The default value for 'neg_candidate_thres' is asinh(8/5 + 1) and the current value for 'neg_candidate_thres' is ", neg_candidate_thres, "."))
}
if(any(landmark_matrix[, 1] < lower_bound)){
landmark_matrix[which(landmark_matrix[, 1] < lower_bound), 1] = min(cell_x_adt, na.rm = TRUE)
print("Warning: some valley landmarks are smaller than the lower bound of the range. They are replaced by the minimum value of cell_x_adt.")
}
args = list("unregfd" = fdobj, "fdobj"=fdobj, "ximarks"=landmark_matrix, "WfdPar"=WfdPar, "monwrd"=monwrd)
if(!is.null(target_landmark)){
args[['x0marks']] = target_landmark
Expand Down

0 comments on commit bbcee3f

Please sign in to comment.