Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added vdiffr into testing, nessecary files for Rcpp, seurat command, and minor syntax changes #256

Merged
merged 11 commits into from
Aug 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ jobs:
config:
- {os: macos-latest, r: 'release'}
- {os: windows-latest, r: 'release'}
- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
- {os: ubuntu-latest, r: 'release'}
- {os: ubuntu-latest, r: 'oldrel-1'}

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
.Rhistory
.Rhistory
local_tests.R
.RData
10 changes: 7 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ RoxygenNote: 7.2.3
biocViews: Software, ImmunoOncology, SingleCell, Classification, Annotation, Sequencing
Depends:
ggplot2,
R (>= 4.0)
R (>= 4.0),
Seurat
Imports:
stringdist,
dplyr,
Expand All @@ -33,16 +34,19 @@ Imports:
tidygraph,
SeuratObject,
stats,
Seurat
Rcpp
Suggests:
knitr,
rmarkdown,
BiocStyle,
circlize,
scales,
scater,
spelling,
testthat (>= 3.0.0),
spelling
vdiffr
VignetteBuilder: knitr
Config/testthat/edition: 3
Language: en-US
LinkingTo:
Rcpp
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ export(vizGenes)
exportClasses(StartracOut)
import(dplyr)
import(ggplot2)
importFrom(Rcpp,sourceCpp)
importFrom(SeuratObject,Embeddings)
importFrom(SeuratObject,Idents)
importFrom(SingleCellExperiment,colData)
Expand Down Expand Up @@ -97,3 +98,4 @@ importFrom(utils,combn)
importFrom(utils,head)
importFrom(vegan,diversity)
importFrom(vegan,estimateR)
useDynLib(scRepertoire, .registration = TRUE)
7 changes: 7 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

rcpp_hello_world <- function() {
invisible(.Call(`_scRepertoire_rcpp_hello_world`))
}

89 changes: 49 additions & 40 deletions R/combineContigs.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,20 +13,25 @@ data1_lines <- c("TCR1", "cdr3", "cdr3_nt")
data2_lines <- c("TCR2", "cdr3", "cdr3_nt")
CT_lines <- c("CTgene", "CTnt", "CTaa", "CTstrict")

utils::globalVariables(c("heavy_lines", "light_lines", "l_lines", "k_lines",
"h_lines", "tcr1_lines", "tcr2_lines", "data1_lines",
"data2_lines", "CT_lines"))
utils::globalVariables(c(
"heavy_lines", "light_lines", "l_lines", "k_lines", "h_lines", "tcr1_lines",
"tcr2_lines", "data1_lines", "data2_lines", "CT_lines"
))

#' Combining the list of T Cell Receptor contigs
#' @title Combining the list of T Cell Receptor contigs
#'
#' This function consolidates a list of TCR sequencing results to the level of
#' the individual cell barcodes. Using the samples and ID parameters, the
#' function will add the strings as prefixes to prevent issues with repeated
#' barcodes. The resulting new barcodes will need to match the Seurat or SCE
#' object in order to use, \code{\link{combineExpression}}. Several
#' levels of filtering exist - remove or filterMulti are parameters that
#' control how the function deals with barcodes with multiple chains
#' recovered.
#' @description This function consolidates a list of TCR sequencing results to
#' the level of the individual cell barcodes. Using the samples and ID
#' parameters, the function will add the strings as prefixes to prevent issues
#' with repeated barcodes. The resulting new barcodes will need to match the
#' Seurat or SCE object in order to use, \code{\link{combineExpression}}.
#' Several levels of filtering exist - `removeNA`, `removeMulti`, or
#' `filterMulti` are parameters that control how the function deals with
#' barcodes with multiple chains recovered.
#'
#' @details For single-sample TCR sequencing experiments, where the input is
#' just a single data.frame, the function will add the consolidated information
#' columns after the existing raw clonotype data
#'
#' @examples
#' combineTCR(contig_list,
Expand All @@ -44,6 +49,7 @@ utils::globalVariables(c("heavy_lines", "light_lines", "l_lines", "k_lines",
#' @import dplyr
#' @export
#' @return List of clonotypes for individual cell barcodes
#'
combineTCR <- function(df,
samples = NULL,
ID = NULL,
Expand Down Expand Up @@ -83,10 +89,10 @@ combineTCR <- function(df,
} else {
out <- df
}
for (i in seq_along(out)) {
for (i in seq_along(out)) { # ideally the nested code could be in a function for a better development/testing experience
data2 <- out[[i]]
data2 <- makeGenes(cellType = "T", data2)
unique_df <- unique(data2$barcode)
unique_df <- unique(data2$barcode) # could potentially display % here
Con.df <- data.frame(matrix(NA, length(unique_df), 7))
colnames(Con.df) <- c("barcode",tcr1_lines, tcr2_lines)
Con.df$barcode <- unique_df
Expand All @@ -104,22 +110,24 @@ combineTCR <- function(df,
}
final[[i]] <- data3
}
names <- NULL
name_vector <- character(length(samples))
for (i in seq_along(samples)) {
if (!is.null(samples) & !is.null(ID)) {
c <- paste(samples[i], "_", ID[i], sep="")
} else if (!is.null(samples) & is.null(ID)) {
c <- paste(samples[i], sep="")
}
names <- c(names, c)
if (!is.null(samples) & !is.null(ID)) {
curr <- paste(samples[i], "_", ID[i], sep="")
} else if (!is.null(samples) & is.null(ID)) {
curr <- paste(samples[i], sep="")
}
name_vector[i] <- curr
}
names(final) <- names
names(final) <- name_vector
for (i in seq_along(final)){
final[[i]]<-final[[i]][!duplicated(final[[i]]$barcode),]
final[[i]]<-final[[i]][rowSums(is.na(final[[i]])) < 10, ]}
if (removeNA == TRUE) { final <- removingNA(final)}
if (removeMulti == TRUE) { final <- removingMulti(final) }
return(final) }
final[[i]]<-final[[i]][!duplicated(final[[i]]$barcode),]
final[[i]]<-final[[i]][rowSums(is.na(final[[i]])) < 10, ]
}
if (removeNA) { final <- removingNA(final)}
if (removeMulti) { final <- removingMulti(final) }
final
}

#' Combining the list of B Cell Receptor contigs
#'
Expand All @@ -133,8 +141,8 @@ combineTCR <- function(df,
#' and the corresponding v-gene. This index automatically calculates
#' the Levenshtein distance between sequences with the same V gene and will
#' index sequences with <= 0.15 normalized Levenshtein distance with the same
#' ID. After which, clonotype clusters are called using the igraph
#' component() function. Clonotype that are clustered across multiple
#' ID. After which, clonotype clusters are called using the
#' `igraph:: component()` function. Clonotype that are clustered across multiple
#' sequences will then be labeled with "LD" in the CTstrict header.
#'
#' @examples
Expand Down Expand Up @@ -188,9 +196,9 @@ combineBCR <- function(df,
}
}
if (!is.null(samples)) {
out <- modifyBarcodes(df, samples, ID)
out <- modifyBarcodes(df, samples, ID)
} else {
out <- df
out <- df
}
for (i in seq_along(out)) {
data2 <- data.frame(out[[i]])
Expand Down Expand Up @@ -226,26 +234,27 @@ combineBCR <- function(df,
if (!is.null(sample) & !is.null(ID)) {
final[[i]]<- final[[i]][, c("barcode", "sample", "ID",
heavy_lines[c(1,2,3)], light_lines[c(1,2,3)], CT_lines)]
}
}
else if (!is.null(sample) & is.null(ID)) {
final[[i]]<- final[[i]][, c("barcode", "sample",
heavy_lines[c(1,2,3)], light_lines[c(1,2,3)], CT_lines)]
}
}
names <- NULL
for (i in seq_along(samples)) {
if (!is.null(samples) & !is.null(ID)) {
c <- paste(samples[i], "_", ID[i], sep="")
} else if (!is.null(samples) & is.null(ID)) {
c <- paste(samples[i], sep="")
}
names <- c(names, c)}
if (!is.null(samples) & !is.null(ID)) {
c <- paste(samples[i], "_", ID[i], sep="")
} else if (!is.null(samples) & is.null(ID)) {
c <- paste(samples[i], sep="")
}
names <- c(names, c)
}
names(final) <- names
for (i in seq_along(final)) {
final[[i]] <- final[[i]][!duplicated(final[[i]]$barcode),]
final[[i]]<-final[[i]][rowSums(is.na(final[[i]])) < 10, ]}
if (removeNA == TRUE) { final <- removingNA(final) }
if (removeMulti == TRUE) { final <- removingMulti(final) }
if (removeNA) { final <- removingNA(final) }
if (removeMulti) { final <- removingMulti(final) }
return(final)
}

Expand Down
6 changes: 0 additions & 6 deletions R/contig_list.R

This file was deleted.

64 changes: 64 additions & 0 deletions R/data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#' A data set of T cell contigs as a list outputed from the
#' filter_contig_annotation files.
#' @docType data
#' @name contig_list
#'
NULL

#' A seurat object of 100 single T cells derived
#' from 3 clear cell renal carcinoma patients.
#'
#' @description The object is compatible with `contig_list` and the TCR
#' sequencing data can be added with `combineExpression`.
#'
#' @name screp_example
#' @docType data
#'
NULL

#' Processed subset of `contig_list`
#'
#' @description A list of 6 dataframes of T cell contigs outputted from the
#' `filtered_contig_annotation` files, but subsetted to about 92 valid T cells
#' which correspond to the same barcodes found in `screp_example`
#'
#' @usage data("combined_mini_contig_list")
#'
#' @format An R `list` of `data.frame` objects
#'
#' @docType data
#'
#' @seealso \code{\link{contig_list}}
#'
"combined_mini_contig_list"

# # Code used for creating the combined_mini_contig_list:

# library(hash, usethis)
#
#data("contig_list", "screp_example")

#combined_mini_contig_list <- combineTCR(
# contig_list,
# samples = c("PY", "PY", "PX", "PX", "PZ","PZ"),
# ID = c("P", "T", "P", "T", "P", "T")
#)
#all_barcodes <- names([email protected])
#barcode_set <- hash::hash(all_barcodes, all_barcodes) # a worse version of a set
#col_names <- names(combined_mini_contig_list[[1]])

#for (i in seq_along(combined_mini_contig_list)) {
# curr_df <- setNames(
# data.frame(replicate(length(col_names), character(0))), col_names
# )
# len <- 0
# for (j in seq_along(combined_mini_contig_list[[i]][[1]])) {
# if (is.null(barcode_set[[combined_mini_contig_list[[i]][[1]][[j]]]])) {
# next
# }
# len <- len + 1
# curr_df[len, ] <- combined_mini_contig_list[[i]][j, ]
# }
# combined_mini_contig_list[[i]] <- curr_df
#}
#usethis::use_data(combined_mini_contig_list)
16 changes: 10 additions & 6 deletions R/processing.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,16 @@
#' stripBarcode(contig_list[[1]], column = 1, connector = "_", num_connects = 1)
#' @export
#' @return list with the suffixes of the barcodes removed.
stripBarcode <- function(contigs, column = 1, connector = "_",
num_connects = 3) {
count <- as.data.frame(t(data.frame(strsplit(contigs[,column],
paste("['", connector, "']", sep="")),
stringsAsFactors = FALSE)),
stringsAsFactors = FALSE)[num_connects]
stripBarcode <- function(contigs, column = 1, connector = "_", num_connects = 3)
{
count <- as.data.frame(
t(data.frame(
strsplit(contigs[,column], paste("['", connector, "']", sep="")),
stringsAsFactors = FALSE
)),
stringsAsFactors = FALSE
)[num_connects]

contigs[,column] <- count
return(contigs)
}
Expand Down
8 changes: 8 additions & 0 deletions R/scRepertoire-package.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#' @keywords internal
"_PACKAGE"

## usethis namespace: start
#' @importFrom Rcpp sourceCpp
#' @useDynLib scRepertoire, .registration = TRUE
## usethis namespace: end
NULL
7 changes: 0 additions & 7 deletions R/screp_example.R

This file was deleted.

Loading
Loading