Skip to content

Commit

Permalink
completely fixed constructConDfAndParseTCR
Browse files Browse the repository at this point in the history
  • Loading branch information
Qile0317 committed Nov 9, 2023
1 parent dc1d7c7 commit 0121717
Show file tree
Hide file tree
Showing 6 changed files with 25 additions and 15 deletions.
7 changes: 3 additions & 4 deletions R/combineContigs.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,19 @@ combineTCR <- function(input.data,
}
}
if (!is.null(samples)) {
out <- .modifyBarcodes(input.data, samples, ID)
out <- .modifyBarcodes(input.data, samples, ID)
} else {
out <- input.data
}
for (i in seq_along(out)) {
data2 <- out[[i]]
data2 <- .makeGenes(cellType = "T", data2)
data2 <- .makeGenes(cellType = "T", out[[i]])
Con.df <- .constructConDfAndParseTCR(data2)
Con.df <- .assignCT(cellType = "T", Con.df)
Con.df[Con.df == "NA_NA" | Con.df == "NA;NA_NA;NA"] <- NA
data3 <- merge(data2[,-which(names(data2) %in% c("TCR1","TCR2"))],
Con.df, by = "barcode")
if (!is.null(samples) & !is.null(ID)) {
data3<-data3[, c("barcode","sample","ID",tcr1_lines,tcr2_lines,
data3 <- data3[, c("barcode", "sample", "ID", tcr1_lines, tcr2_lines,
CT_lines)] }
else if (!is.null(samples) & is.null(ID)) {
data3<-data3[,c("barcode","sample",tcr1_lines,tcr2_lines,
Expand Down
5 changes: 2 additions & 3 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ is_seurat_or_se_object <- function(obj) {
.constructConDfAndParseTCR <- function(data2) {
rcppConstructConDfAndParseTCR(
data2 %>% arrange(., chain, cdr3_nt),
unique(data2$barcode)
unique(data2[[1]])
)
}

Expand Down Expand Up @@ -400,8 +400,7 @@ is_seurat_or_se_object <- function(obj) {
str_c(str_replace_na(v_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."), NA)) %>%
mutate(TCR2 = ifelse(chain %in% c("TRB", "TRD"),
str_c(str_replace_na(v_gene), str_replace_na(d_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."), NA))
}
else { # assume BCR (`c("B")`)
} else if (cellType %in% c("B")) {
heavy <- data2[data2$chain == "IGH",] %>%
mutate(IGHct = str_c(str_replace_na(v_gene), str_replace_na(d_gene), str_replace_na(j_gene), str_replace_na(c_gene), sep = "."))
kappa <- data2[data2$chain == "IGK",] %>%
Expand Down
15 changes: 10 additions & 5 deletions src/constructConDfAndparseTCR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ std::vector<std::vector<int>> constructBarcodeIndex(

class TcrParser {
public:
// variable for the eventual output conDf
// variable for the eventual output Con.df
std::vector<std::vector<std::string>> conDf;

// variables for *references* to columns on data2
Expand All @@ -39,6 +39,8 @@ class TcrParser {
// variable for helper barcode index
std::vector<std::vector<int>> barcodeIndex;

// constructor: in the future if more columns are ever added its probably a much
// more general algo would be to just find the indicies of the dataframe by looking at the colnames
TcrParser(
Rcpp::DataFrame& data2, std::vector<std::string>& uniqueData2Barcodes
) {
Expand All @@ -48,19 +50,22 @@ class TcrParser {
);
conDf[0] = uniqueData2Barcodes;

// set references to data2 columns
// set references to fixed data2 columns
data2ChainTypes = data2[5];
data2Tcr1 = data2[19];
data2Tcr2 = data2[20];
data2Cdr3 = data2[12];
data2Cdr3Nt = data2[13];

// barcodeIndex
// setting reference to the TCR columns assuming all extra columns come before
data2Tcr1 = data2[data2.size() - 2];
data2Tcr2 = data2[data2.size() - 1];

// construct barcodeIndex
barcodeIndex = constructBarcodeIndex(
uniqueData2Barcodes, Rcpp::as<std::vector<std::string>>(data2[0])
);
}

// Rcpp implementation of .parseTCR()
void parseTCR() {
for (int y = 0; y < (int) conDf[0].size(); y++) {
for (int index : barcodeIndex[y]) {
Expand Down
13 changes: 10 additions & 3 deletions tests/testthat/test-utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,17 @@ test_that(".theCall works", {
})
#TODO .theCall Add custom header

# TODO .constructConDfAndParseTCR !!!! Need to use testing data from the old version

test_that(".constructConDfAndParseTCR works", {
# TODO create testdata with the original .parseTCR and test here. also do edgecases
expect_identical(
.constructConDfAndParseTCR(
getdata("utils", "constructConDfAndParseTCRInput")
),
getdata("utils", "expected_con_df")
)

# TODO: add more cases! This is not comprehensive.
# there was a case in the past where this passed but
# the function caused a segmentation fault.
})

# TODO .parseBCR
Expand Down
Binary file not shown.
Binary file not shown.

0 comments on commit 0121717

Please sign in to comment.