diff --git a/DESCRIPTION b/DESCRIPTION index ad4d1b2..1a84a1d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: crisprDesign Title: Comprehensive design of CRISPR gRNAs for nucleases and base editors -Version: 1.3.2 +Version: 1.3.3 Authors@R: c( person("Jean-Philippe", "Fortin", email = "fortin946@gmail.com", role = c("aut", "cre")), person("Luke", "Hoberecht", email = "lukehob3@gmail.com", role = c("aut")) diff --git a/NAMESPACE b/NAMESPACE index 068598f..f26cacd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -44,6 +44,7 @@ export(designCompleteAnnotation) export(designOpsLibrary) export(editedAlleles) export(enzymeAnnotation) +export(exonTable) export(findSpacerPairs) export(findSpacers) export(flattenGuideSet) @@ -78,6 +79,7 @@ export(spacerDistance) export(spacers) export(targetOrigin) export(tssAnnotation) +export(txTable) export(updateOpsLibrary) export(validateOpsLibrary) exportClasses(GuideSet) @@ -115,6 +117,7 @@ exportMethods(cutLength) exportMethods(cutSites) exportMethods(editedAlleles) exportMethods(enzymeAnnotation) +exportMethods(exonTable) exportMethods(geneAnnotation) exportMethods(offTargets) exportMethods(onTargets) @@ -133,6 +136,7 @@ exportMethods(spacerLength) exportMethods(spacers) exportMethods(targetOrigin) exportMethods(tssAnnotation) +exportMethods(txTable) importClassesFrom(Biostrings,DNAStringSet) importClassesFrom(GenomeInfoDb,Seqinfo) importClassesFrom(GenomicRanges,GPos) diff --git a/R/AllGenerics.R b/R/AllGenerics.R index 30fdee4..fff6c8b 100644 --- a/R/AllGenerics.R +++ b/R/AllGenerics.R @@ -92,6 +92,18 @@ setGeneric("editedAlleles", function(object, ...) standardGeneric("editedAlleles")) +#' @rdname GuideSet-class +#' @export +setGeneric("txTable", + function(object, ...) standardGeneric("txTable")) + + +#' @rdname GuideSet-class +#' @export +setGeneric("exonTable", + function(object, ...) standardGeneric("exonTable")) + + #' @rdname PairedGuideSet-class #' @export setGeneric("pamOrientation", diff --git a/R/GuideSet-class.R b/R/GuideSet-class.R index ba380e4..24b92dc 100644 --- a/R/GuideSet-class.R +++ b/R/GuideSet-class.R @@ -106,7 +106,7 @@ GuideSet <- function(ids = NA_character_, targetOrigin <- match.arg(targetOrigin) protospacers <- .validateGuideSetSequences("protospacers", protospacers) pams <- .validateGuideSetSequences("pams", pams) - + # Checking ids if (sum(duplicated(ids))>0){ stop("Duplicated values for 'ids' are not allowed.") @@ -114,8 +114,8 @@ GuideSet <- function(ids = NA_character_, if (length(ids)!=length(protospacers)){ stop("'ids' must have the same length as 'protospacers'.") } - - + + gr <- GRanges(seqnames, IRanges(start=pam_site, width=1), @@ -123,7 +123,7 @@ GuideSet <- function(ids = NA_character_, ..., seqinfo=seqinfo, seqlengths=seqlengths) - + # Adding global metadata: metadata(gr)[["CrisprNuclease"]] <- CrisprNuclease metadata(gr)[["targetOrigin"]] <- targetOrigin @@ -138,7 +138,7 @@ GuideSet <- function(ids = NA_character_, .isDNAStringSet(customSequences) metadata(gr)[["customSequences"]] <- customSequences } - + # Adding metadata columns: mcols(gr)[["protospacer"]] <- DNAStringSet(protospacers) if (!is.null(pams)){ @@ -163,24 +163,24 @@ GuideSet <- function(ids = NA_character_, setMethod("show", signature(object = "GuideSet"), function(object){ - callNextMethod() - name <- nucleaseName(metadata(object)$CrisprNuclease) - cat(paste0(" crisprNuclease: ", name, "\n")) -}) + callNextMethod() + name <- nucleaseName(metadata(object)$CrisprNuclease) + cat(paste0(" crisprNuclease: ", name, "\n")) + }) setValidity("GuideSet", function(object){ - + df <- mcols(object) mandatoryCols <- c("protospacer", "pam_site","pam") out <- TRUE if (!all(mandatoryCols %in% colnames(df))){ out <- paste0("The following columns must be present", - " in mcols(object): protospacer, pam_site and pam.") + " in mcols(object): protospacer, pam_site and pam.") return(out) } - + if (!is(df[["protospacer"]], "DNAStringSet")){ out <- "mcols(object)$protospacer must be a DNAStringSet object." return(out) @@ -189,7 +189,7 @@ setValidity("GuideSet", function(object){ out <- "mcols(object)$pam must be a DNAStringSet object." return(out) } - + meta <- metadata(object) mandatoryMetaFields <- c("CrisprNuclease", "targetOrigin") if (!all(mandatoryMetaFields %in% names(meta))){ @@ -197,7 +197,7 @@ setValidity("GuideSet", function(object){ " in metadata(object): CrisprNuclease and targetOrigin.") return(out) } - + targetOriginChoices <- c("bsgenome", "customSequences") target <- meta[["targetOrigin"]] if (length(target)!=1){ @@ -206,17 +206,17 @@ setValidity("GuideSet", function(object){ if (!target %in% targetOriginChoices){ stop("targetOrigin must be either 'bsgenome' or 'customSequences'.") } - + if (!target%in% names(meta)){ stop("When 'targetOrigin' is set to ", target, ", '", - target, "' must be specified in the metadata field.") + target, "' must be specified in the metadata field.") } if (target=="bsgenome"){ .isBSGenome(BSgenome::getBSgenome(meta[["bsgenome"]])) } else if (target=="customSequences"){ .isDNAStringSet(meta[["customSequences"]]) } - + nuc <- meta[["CrisprNuclease"]] if (!is(nuc, "CrisprNuclease")){ out <- "metadata(object)$CrisprNuclease must be a CrisprNuclease object" @@ -234,10 +234,10 @@ setValidity("GuideSet", function(object){ #' @param object \linkS4class{GuideSet} object. #' @export setMethod("targetOrigin", "GuideSet", - function(object){ - out <- metadata(object)[["targetOrigin"]] - return(out) -}) + function(object){ + out <- metadata(object)[["targetOrigin"]] + return(out) + }) @@ -245,10 +245,10 @@ setMethod("targetOrigin", "GuideSet", #' @param object \linkS4class{GuideSet} object. #' @export setMethod("customSequences", "GuideSet", - function(object){ - out <- metadata(object)[["customSequences"]] - return(out) -}) + function(object){ + out <- metadata(object)[["customSequences"]] + return(out) + }) #' @rdname GuideSet-class @@ -256,14 +256,14 @@ setMethod("customSequences", "GuideSet", #' @importFrom BSgenome getBSgenome #' @export setMethod("bsgenome", "GuideSet", - function(object){ - out <- metadata(object)[["bsgenome"]] - if (!is.character(out)){ - out <- .bsgenome_pkgname(out) - } - out <- BSgenome::getBSgenome(out) - return(out) -}) + function(object){ + out <- metadata(object)[["bsgenome"]] + if (!is.character(out)){ + out <- .bsgenome_pkgname(out) + } + out <- BSgenome::getBSgenome(out) + return(out) + }) @@ -279,10 +279,10 @@ setMethod("bsgenome", "GuideSet", #' @param object \linkS4class{GuideSet} object. #' @export setMethod("crisprNuclease", "GuideSet", - function(object){ - out <- metadata(object)[["CrisprNuclease"]] - return(out) -}) + function(object){ + out <- metadata(object)[["CrisprNuclease"]] + return(out) + }) #' @rdname GuideSet-class @@ -296,50 +296,50 @@ setMethod("crisprNuclease", "GuideSet", #' @importFrom Biostrings reverseComplement #' @importFrom Biostrings RNAStringSet setMethod("spacers", "GuideSet", - function(object, - as.character=FALSE, - returnAsRna=FALSE){ - out <- mcols(object)[["protospacer"]] - if (returnAsRna){ - out <- RNAStringSet(out) - } - if (isRnase(crisprNuclease(object))){ - out <- reverseComplement(out) - } - if (as.character){ - out <- as.character(out) - } - names(out) <- names(object) - return(out) -}) + function(object, + as.character=FALSE, + returnAsRna=FALSE){ + out <- mcols(object)[["protospacer"]] + if (returnAsRna){ + out <- RNAStringSet(out) + } + if (isRnase(crisprNuclease(object))){ + out <- reverseComplement(out) + } + if (as.character){ + out <- as.character(out) + } + names(out) <- names(object) + return(out) + }) #' @rdname GuideSet-class #' @export setMethod("pams", "GuideSet", - function(object, - as.character=FALSE, - returnAsRna=FALSE){ - out <- mcols(object)[["pam"]] - if (returnAsRna){ - out <- RNAStringSet(out) - } - if (as.character){ - out <- as.character(out) - } - names(out) <- names(object) - return(out) -}) + function(object, + as.character=FALSE, + returnAsRna=FALSE){ + out <- mcols(object)[["pam"]] + if (returnAsRna){ + out <- RNAStringSet(out) + } + if (as.character){ + out <- as.character(out) + } + names(out) <- names(object) + return(out) + }) #' @rdname GuideSet-class #' @export setMethod("pamSites", "GuideSet", - function(object){ - out <- mcols(object)[["pam_site"]] - names(out) <- names(object) - return(out) -}) + function(object){ + out <- mcols(object)[["pam_site"]] + names(out) <- names(object) + return(out) + }) @@ -349,19 +349,19 @@ setMethod("pamSites", "GuideSet", #' @importFrom BiocGenerics strand #' @importFrom crisprBase getCutSiteFromPamSite setMethod("cutSites", "GuideSet", - function(object){ - pamSites <- mcols(object)[["pam_site"]] - nuc <- metadata(object)[["CrisprNuclease"]] - strand <- as.character(strand(object)) - ambiguousStrand <- strand == "*" - out <- rep(NA, length(object)) - out[!ambiguousStrand] <- getCutSiteFromPamSite( - pam_site=pamSites[!ambiguousStrand], - strand=strand[!ambiguousStrand], - nuclease=nuc) - names(out) <- names(object) - return(out) -}) + function(object){ + pamSites <- mcols(object)[["pam_site"]] + nuc <- metadata(object)[["CrisprNuclease"]] + strand <- as.character(strand(object)) + ambiguousStrand <- strand == "*" + out <- rep(NA, length(object)) + out[!ambiguousStrand] <- getCutSiteFromPamSite( + pam_site=pamSites[!ambiguousStrand], + strand=strand[!ambiguousStrand], + nuclease=nuc) + names(out) <- names(object) + return(out) + }) #' @rdname GuideSet-class @@ -370,19 +370,19 @@ setMethod("cutSites", "GuideSet", #' @importFrom crisprBase getCutSiteFromPamSite #' @importFrom S4Vectors mcols<- setMethod("addCutSites", "GuideSet", - function(object){ - - nuclease <- crisprNuclease(object) - strand <- as.character(BiocGenerics::strand(object)) - ambiguousStrand <- strand == "*" - cutSite <- rep(NA, length(object)) - cutSite[!ambiguousStrand] <- getCutSiteFromPamSite( - pam_site=pamSites(object)[!ambiguousStrand], - strand=strand[!ambiguousStrand], - nuclease=nuclease) - mcols(object)[["cut_site"]] <- cutSite - return(object) -}) + function(object){ + + nuclease <- crisprNuclease(object) + strand <- as.character(BiocGenerics::strand(object)) + ambiguousStrand <- strand == "*" + cutSite <- rep(NA, length(object)) + cutSite[!ambiguousStrand] <- getCutSiteFromPamSite( + pam_site=pamSites(object)[!ambiguousStrand], + strand=strand[!ambiguousStrand], + nuclease=nuclease) + mcols(object)[["cut_site"]] <- cutSite + return(object) + }) @@ -395,25 +395,25 @@ setMethod("addCutSites", "GuideSet", #' FALSE by default. #' @export setMethod("protospacers", "GuideSet", - function(object, - as.character=FALSE, - include.pam=FALSE, - returnAsRna=FALSE){ - out <- mcols(object)[["protospacer"]] - if (include.pam){ - pams <- pams(object) - out <- paste0(out, pams) - } - out <- DNAStringSet(out) - if (returnAsRna){ - out <- RNAStringSet(out) - } - if (as.character){ - out <- as.character(out) - } - names(out) <- names(object) - return(out) -}) + function(object, + as.character=FALSE, + include.pam=FALSE, + returnAsRna=FALSE){ + out <- mcols(object)[["protospacer"]] + if (include.pam){ + pams <- pams(object) + out <- paste0(out, pams) + } + out <- DNAStringSet(out) + if (returnAsRna){ + out <- RNAStringSet(out) + } + if (as.character){ + out <- as.character(out) + } + names(out) <- names(object) + return(out) + }) @@ -422,11 +422,11 @@ setMethod("protospacers", "GuideSet", #' @export #' @importFrom crisprBase spacerLength setMethod("spacerLength", "GuideSet", - function(object){ - nuc <- metadata(object)$CrisprNuclease - out <- spacerLength(nuc) - return(out) -}) + function(object){ + nuc <- metadata(object)$CrisprNuclease + out <- spacerLength(nuc) + return(out) + }) @@ -436,11 +436,11 @@ setMethod("spacerLength", "GuideSet", #' @export #' @importFrom crisprBase prototypeSequence setMethod("prototypeSequence", "GuideSet", - function(object){ - nuc <- metadata(object)$CrisprNuclease - out <- prototypeSequence(nuc) - return(out) -}) + function(object){ + nuc <- metadata(object)$CrisprNuclease + out <- prototypeSequence(nuc) + return(out) + }) @@ -448,21 +448,21 @@ setMethod("prototypeSequence", "GuideSet", #' @export #' @importFrom crisprBase pamLength setMethod("pamLength", "GuideSet", - function(object){ - nuc <- metadata(object)$CrisprNuclease - out <- pamLength(nuc) - return(out) -}) + function(object){ + nuc <- metadata(object)$CrisprNuclease + out <- pamLength(nuc) + return(out) + }) #' @rdname GuideSet-class #' @export #' @importFrom crisprBase pamSide setMethod("pamSide", "GuideSet", - function(object){ - nuc <- metadata(object)$CrisprNuclease - out <- pamSide(nuc) - return(out) -}) + function(object){ + nuc <- metadata(object)$CrisprNuclease + out <- pamSide(nuc) + return(out) + }) @@ -478,26 +478,26 @@ setMethod("pamSide", "GuideSet", #' @importFrom BiocGenerics unlist rownames #' @export setMethod("snps", "GuideSet", - function(object, - unlist=TRUE, - use.names=TRUE){ - if (!"snps" %in% colnames(S4Vectors::mcols(object))){ - out <- NULL - } else { - out <- S4Vectors::mcols(object)[["snps"]] - out <- BiocGenerics::unlist(out, use.names=FALSE) - if (!use.names){ - out <- .namesAsColumn_df(out) - split_factor <- out[["spacer_id"]] - } else { - split_factor <- BiocGenerics::rownames(out) - } - if (!unlist){ - out <- S4Vectors::split(out, f=split_factor) - } - } - return(out) -}) + function(object, + unlist=TRUE, + use.names=TRUE){ + if (!"snps" %in% colnames(S4Vectors::mcols(object))){ + out <- NULL + } else { + out <- S4Vectors::mcols(object)[["snps"]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + if (!use.names){ + out <- .namesAsColumn_df(out) + split_factor <- out[["spacer_id"]] + } else { + split_factor <- BiocGenerics::rownames(out) + } + if (!unlist){ + out <- S4Vectors::split(out, f=split_factor) + } + } + return(out) + }) #' @rdname GuideSet-class @@ -507,26 +507,26 @@ setMethod("snps", "GuideSet", #' @importFrom BiocGenerics unlist #' @export setMethod("alignments", "GuideSet", - function(object, - columnName="alignments", - unlist=TRUE, - use.names=TRUE){ - if (!columnName %in% colnames(S4Vectors::mcols(object)) || - !.isAlignmentsColumn(object, columnName)){ - out <- NULL - } else { - out <- S4Vectors::mcols(object)[[columnName]] - out <- BiocGenerics::unlist(out, use.names=FALSE) - if (!unlist){ - split_factor <- factor(names(out), levels=names(object)) - if (!use.names){ - out <- .namesAsColumn_gr(out) - } - out <- S4Vectors::split(out, f=split_factor)[names(object)] - } - } - return(out) -}) + function(object, + columnName="alignments", + unlist=TRUE, + use.names=TRUE){ + if (!columnName %in% colnames(S4Vectors::mcols(object)) || + !.isAlignmentsColumn(object, columnName)){ + out <- NULL + } else { + out <- S4Vectors::mcols(object)[[columnName]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + if (!unlist){ + split_factor <- factor(names(out), levels=names(object)) + if (!use.names){ + out <- .namesAsColumn_gr(out) + } + out <- S4Vectors::split(out, f=split_factor)[names(object)] + } + } + return(out) + }) @@ -537,27 +537,27 @@ setMethod("alignments", "GuideSet", #' @importFrom BiocGenerics unlist #' @export setMethod("onTargets", "GuideSet", - function(object, - columnName="alignments", - unlist=TRUE, - use.names=TRUE){ - if (!columnName %in% colnames(S4Vectors::mcols(object)) || - !.isAlignmentsColumn(object, columnName)){ - out <- NULL - } else { - out <- S4Vectors::mcols(object)[[columnName]] - out <- BiocGenerics::unlist(out, use.names=FALSE) - out <- out[out$n_mismatches == 0] - if (!unlist){ - split_factor <- factor(names(out), levels=names(object)) - if (!use.names){ - out <- .namesAsColumn_gr(out) - } - out <- S4Vectors::split(out, f=split_factor)[names(object)] - } - } - return(out) -}) + function(object, + columnName="alignments", + unlist=TRUE, + use.names=TRUE){ + if (!columnName %in% colnames(S4Vectors::mcols(object)) || + !.isAlignmentsColumn(object, columnName)){ + out <- NULL + } else { + out <- S4Vectors::mcols(object)[[columnName]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + out <- out[out$n_mismatches == 0] + if (!unlist){ + split_factor <- factor(names(out), levels=names(object)) + if (!use.names){ + out <- .namesAsColumn_gr(out) + } + out <- S4Vectors::split(out, f=split_factor)[names(object)] + } + } + return(out) + }) #' @rdname GuideSet-class @@ -568,35 +568,35 @@ setMethod("onTargets", "GuideSet", #' @importFrom BiocGenerics unlist #' @export setMethod("offTargets", "GuideSet", - function(object, - columnName="alignments", - max_mismatches=Inf, - unlist=TRUE, - use.names=TRUE){ - - stopifnot("max_mismatches must be a non-negative integer" = { - is.vector(max_mismatches, mode="numeric") && - length(max_mismatches) == 1 && - max_mismatches == round(max_mismatches) && - max_mismatches >= 0 - }) - if (!columnName %in% colnames(S4Vectors::mcols(object)) || - !.isAlignmentsColumn(object, columnName)){ - out <- NULL - } else { - out <- S4Vectors::mcols(object)[[columnName]] - out <- BiocGenerics::unlist(out, use.names=FALSE) - out <- out[out$n_mismatches > 0 & out$n_mismatches <= max_mismatches] - if (!unlist){ - split_factor <- factor(names(out), levels=names(object)) - if (!use.names){ - out <- .namesAsColumn_gr(out) - } - out <- S4Vectors::split(out, f=split_factor)[names(object)] - } - } - return(out) -}) + function(object, + columnName="alignments", + max_mismatches=Inf, + unlist=TRUE, + use.names=TRUE){ + + stopifnot("max_mismatches must be a non-negative integer" = { + is.vector(max_mismatches, mode="numeric") && + length(max_mismatches) == 1 && + max_mismatches == round(max_mismatches) && + max_mismatches >= 0 + }) + if (!columnName %in% colnames(S4Vectors::mcols(object)) || + !.isAlignmentsColumn(object, columnName)){ + out <- NULL + } else { + out <- S4Vectors::mcols(object)[[columnName]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + out <- out[out$n_mismatches > 0 & out$n_mismatches <= max_mismatches] + if (!unlist){ + split_factor <- factor(names(out), levels=names(object)) + if (!use.names){ + out <- .namesAsColumn_gr(out) + } + out <- S4Vectors::split(out, f=split_factor)[names(object)] + } + } + return(out) + }) @@ -633,43 +633,52 @@ setMethod("offTargets", "GuideSet", #' @param value Object to replace with #' @export setMethod("alignments<-", "GuideSet", - function(object, value){ - mcols(object)[["alignments"]] <- value - return(object) -}) + function(object, value){ + mcols(object)[["alignments"]] <- value + return(object) + }) #' @rdname GuideSet-class #' @export setMethod("geneAnnotation<-", "GuideSet", - function(object, value){ - mcols(object)[["geneAnnotation"]] <- value - return(object) -}) + function(object, value){ + mcols(object)[["geneAnnotation"]] <- value + return(object) + }) #' @rdname GuideSet-class #' @export setMethod("tssAnnotation<-", "GuideSet", - function(object, value){ - mcols(object)[["tssAnnotation"]] <- value - return(object) -}) + function(object, value){ + mcols(object)[["tssAnnotation"]] <- value + return(object) + }) #' @rdname GuideSet-class #' @export setMethod("enzymeAnnotation<-", "GuideSet", - function(object, value){ - mcols(object)[["enzymeAnnotation"]] <- value - return(object) -}) + function(object, value){ + mcols(object)[["enzymeAnnotation"]] <- value + return(object) + }) #' @rdname GuideSet-class #' @export setMethod("snps<-", "GuideSet", - function(object, value){ - mcols(object)[["snps"]] <- value - return(object) -}) + function(object, value){ + mcols(object)[["snps"]] <- value + return(object) + }) + + +# #' @rdname GuideSet-class +# #' @export +# setMethod("txTable<-", "GuideSet", +# function(object, value){ +# mcols(object)[["txTable"]] <- value +# return(object) +# }) @@ -693,46 +702,46 @@ setMethod("snps<-", "GuideSet", #' @importFrom BiocGenerics unlist rownames #' @export setMethod("geneAnnotation", "GuideSet", - function(object, - unlist=TRUE, - gene_id=NULL, - tx_id=NULL, - gene_symbol=NULL, - use.names=TRUE){ - if (!"geneAnnotation" %in% colnames(S4Vectors::mcols(object))){ - out <- NULL - } else { - out <- S4Vectors::mcols(object)[["geneAnnotation"]] - out <- BiocGenerics::unlist(out, use.names=FALSE) - if (is.null(gene_id)){ - gene_id <- unique(out$gene_id) - } - if (is.null(tx_id)){ - tx_id <- unique(out$tx_id) - } - if (is.null(gene_symbol)){ - gene_symbol <- unique(out$gene_symbol) - } - cols <- c("gene_id", "tx_id", "gene_symbol") - cols <- intersect(cols, colnames(out)) - whs <- lapply(cols, function(col){ - out[[col]] %in% get(col) - }) - wh <- Reduce("&", whs) - out <- out[wh, , drop=FALSE] - - if (!use.names){ - out <- .namesAsColumn_df(out) - split_factor <- out[["spacer_id"]] - } else { - split_factor <- BiocGenerics::rownames(out) - } - if (!unlist){ - out <- S4Vectors::split(out, f=split_factor) - } - } - return(out) -}) + function(object, + unlist=TRUE, + gene_id=NULL, + tx_id=NULL, + gene_symbol=NULL, + use.names=TRUE){ + if (!"geneAnnotation" %in% colnames(S4Vectors::mcols(object))){ + out <- NULL + } else { + out <- S4Vectors::mcols(object)[["geneAnnotation"]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + if (is.null(gene_id)){ + gene_id <- unique(out$gene_id) + } + if (is.null(tx_id)){ + tx_id <- unique(out$tx_id) + } + if (is.null(gene_symbol)){ + gene_symbol <- unique(out$gene_symbol) + } + cols <- c("gene_id", "tx_id", "gene_symbol") + cols <- intersect(cols, colnames(out)) + whs <- lapply(cols, function(col){ + out[[col]] %in% get(col) + }) + wh <- Reduce("&", whs) + out <- out[wh, , drop=FALSE] + + if (!use.names){ + out <- .namesAsColumn_df(out) + split_factor <- out[["spacer_id"]] + } else { + split_factor <- BiocGenerics::rownames(out) + } + if (!unlist){ + out <- S4Vectors::split(out, f=split_factor) + } + } + return(out) + }) @@ -741,15 +750,30 @@ setMethod("geneAnnotation", "GuideSet", #' @importFrom S4Vectors mcols #' @export setMethod("editedAlleles", "GuideSet", - function(object){ - if (!"editedAlleles" %in% colnames(S4Vectors::mcols(object))){ - out <- NULL - } else { - out <- S4Vectors::mcols(object)[["editedAlleles"]] - names(out) <- names(object) - } - return(out) -}) + function(object, + unlist=TRUE, + use.names=TRUE){ + if (!"editedAlleles" %in% colnames(S4Vectors::mcols(object))){ + out <- NULL + message("Edited alleles annotation has not been added yet.", + "See the function 'addEditedAlleles' to add ", + "edited alleles annotation.") + } else { + out <- S4Vectors::mcols(object)[["editedAlleles"]] + out <- do.call(rbind, out) + # out <- BiocGenerics::unlist(out, use.names=FALSE) + if (!use.names){ + out <- .namesAsColumn_df(out) + split_factor <- out[["spacer_id"]] + } else { + split_factor <- BiocGenerics::rownames(out) + } + if (!unlist){ + out <- S4Vectors::split(out, f=split_factor) + } + } + return(out) + }) @@ -759,41 +783,41 @@ setMethod("editedAlleles", "GuideSet", #' @importFrom BiocGenerics unlist rownames #' @export setMethod("tssAnnotation", "GuideSet", - function(object, - unlist=TRUE, - gene_id=NULL, - gene_symbol=NULL, - use.names=TRUE){ - if (!"tssAnnotation" %in% colnames(S4Vectors::mcols(object))){ - out <- NULL - } else { - out <- S4Vectors::mcols(object)[["tssAnnotation"]] - out <- BiocGenerics::unlist(out, use.names=FALSE) - if (is.null(gene_id)){ - gene_id <- unique(out$gene_id) - } - if (is.null(gene_symbol)){ - gene_symbol <- unique(out$gene_symbol) - } - cols <- c("gene_id", "gene_symbol") - whs <- lapply(cols, function(col){ - out[[col]] %in% get(col) - }) - wh <- Reduce("&", whs) - out <- out[wh, , drop=FALSE] - - if (!use.names){ - out <- .namesAsColumn_df(out) - split_factor <- out[["spacer_id"]] - } else { - split_factor <- BiocGenerics::rownames(out) - } - if (!unlist){ - out <- S4Vectors::split(out, f=split_factor) - } - } - return(out) -}) + function(object, + unlist=TRUE, + gene_id=NULL, + gene_symbol=NULL, + use.names=TRUE){ + if (!"tssAnnotation" %in% colnames(S4Vectors::mcols(object))){ + out <- NULL + } else { + out <- S4Vectors::mcols(object)[["tssAnnotation"]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + if (is.null(gene_id)){ + gene_id <- unique(out$gene_id) + } + if (is.null(gene_symbol)){ + gene_symbol <- unique(out$gene_symbol) + } + cols <- c("gene_id", "gene_symbol") + whs <- lapply(cols, function(col){ + out[[col]] %in% get(col) + }) + wh <- Reduce("&", whs) + out <- out[wh, , drop=FALSE] + + if (!use.names){ + out <- .namesAsColumn_df(out) + split_factor <- out[["spacer_id"]] + } else { + split_factor <- BiocGenerics::rownames(out) + } + if (!unlist){ + out <- S4Vectors::split(out, f=split_factor) + } + } + return(out) + }) @@ -802,29 +826,98 @@ setMethod("tssAnnotation", "GuideSet", #' @importFrom BiocGenerics unlist rownames #' @export setMethod("enzymeAnnotation", "GuideSet", - function(object, - unlist=TRUE, - use.names=TRUE){ - if (!"enzymeAnnotation" %in% colnames(S4Vectors::mcols(object))){ - out <- NULL - message("An enzymeAnnotation is not added yet. See ", - "the function 'addRestrictionEnzymes' to add ", - "enzyme annotation") - } else { - out <- S4Vectors::mcols(object)[["enzymeAnnotation"]] - out <- BiocGenerics::unlist(out, use.names=FALSE) - if (!use.names){ - out <- .namesAsColumn_df(out) - split_factor <- out[["spacer_id"]] - } else { - split_factor <- BiocGenerics::rownames(out) - } - if (!unlist){ - out <- S4Vectors::split(out, f=split_factor) - } - } - return(out) -}) + function(object, + unlist=TRUE, + use.names=TRUE){ + if (!"enzymeAnnotation" %in% colnames(S4Vectors::mcols(object))){ + out <- NULL + message("An enzymeAnnotation is not added yet. See ", + "the function 'addRestrictionEnzymes' to add ", + "enzyme annotation") + } else { + out <- S4Vectors::mcols(object)[["enzymeAnnotation"]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + if (!use.names){ + out <- .namesAsColumn_df(out) + split_factor <- out[["spacer_id"]] + } else { + split_factor <- BiocGenerics::rownames(out) + } + if (!unlist){ + out <- S4Vectors::split(out, f=split_factor) + } + } + return(out) + }) + + + + +#' @rdname GuideSet-class +#' @importFrom S4Vectors mcols split +#' @importFrom BiocGenerics unlist rownames +#' @export +setMethod("txTable", "GuideSet", + function(object, + unlist=TRUE, + use.names=TRUE){ + if (!"txTable" %in% colnames(S4Vectors::mcols(object))){ + out <- NULL + message("A txTable has not been added yet. See ", + "the function 'addTxTable' to add ", + "a txTable.") + } else { + out <- S4Vectors::mcols(object)[["txTable"]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + if (!use.names){ + out <- .namesAsColumn_df(out) + split_factor <- out[["spacer_id"]] + } else { + split_factor <- BiocGenerics::rownames(out) + } + if (!unlist){ + out <- S4Vectors::split(out, f=split_factor) + } + } + return(out) + } +) + + + + +#' @rdname GuideSet-class +#' @importFrom S4Vectors mcols split +#' @importFrom BiocGenerics unlist rownames +#' @export +setMethod("exonTable", "GuideSet", + function(object, + unlist=TRUE, + use.names=TRUE){ + if (!"exonTable" %in% colnames(S4Vectors::mcols(object))){ + out <- NULL + message("An exonTable has not been added yet. See ", + "the function 'addExonTable' to add ", + "a txTable.") + } else { + out <- S4Vectors::mcols(object)[["exonTable"]] + out <- BiocGenerics::unlist(out, use.names=FALSE) + if (!use.names){ + out <- .namesAsColumn_df(out) + split_factor <- out[["spacer_id"]] + } else { + split_factor <- BiocGenerics::rownames(out) + } + if (!unlist){ + out <- S4Vectors::split(out, f=split_factor) + } + } + return(out) + } +) + + + diff --git a/R/GuideSet2DataFrames.R b/R/GuideSet2DataFrames.R index 95e52d2..2c78299 100644 --- a/R/GuideSet2DataFrames.R +++ b/R/GuideSet2DataFrames.R @@ -70,7 +70,7 @@ flattenGuideSet <- function(guideSet, #' \item \code{snps} SNP annotation table (human only). #' } #' -#' @author Jean-Philippe Fortin +#' @author Jean-Philippe Fortin, Luke Hoberecht #' #' #' @export @@ -93,11 +93,15 @@ GuideSet2DataFrames <- function(guideSet, cols <- c("alignments", "geneAnnotation", "tssAnnotation", - "enzymeAnnotation", "snps") + "enzymeAnnotation", + "snps", + "txTable", + "exonTable", + "editedAlleles") cols <- intersect(cols, colnames(mcols(guideSet))) secondaryTables <- lapply(cols, function(col){ .getSecondaryTable(guideSet, - col, + colname=col, useSpacerCoordinates=useSpacerCoordinates) }) names(secondaryTables) <- cols @@ -128,7 +132,6 @@ GuideSet2DataFrames <- function(guideSet, tab[["ID"]] <- rownames(tab) } rownames(tab) <- NULL - tab <- .putColumnFirst("chr", tab) if (is(guideSet, "GuideSet")){ @@ -142,7 +145,8 @@ GuideSet2DataFrames <- function(guideSet, return(tab) } -#' @importFrom GenomeInfoDb seqnames + +#' @importFrom GenomeInfoDb seqnames genome #' @importFrom BiocGenerics start end strand #' @importFrom crisprBase getProtospacerRanges .getIrangesTable <- function(guideSet, @@ -150,15 +154,23 @@ GuideSet2DataFrames <- function(guideSet, nuclease=NULL ){ out <- data.frame(chr=as.character(GenomeInfoDb::seqnames(guideSet))) - if (!useSpacerCoordinates){ - out$start <- as.integer(BiocGenerics::start(guideSet)) - out$end <- as.integer(BiocGenerics::end(guideSet)) - } else { - protospacerRanges <- getProtospacerRanges(gr=guideSet, - nuclease=nuclease) - out$start <- as.integer(BiocGenerics::start(protospacerRanges)) - out$end <- as.integer(BiocGenerics::end(protospacerRanges)) + if (useSpacerCoordinates){ + genome <- GenomeInfoDb::genome(guideSet) + if (any(genome != "ntc")){ + validSeqnames <- names(genome[genome != "ntc"]) + protospacers <- as.vector(GenomeInfoDb::seqnames(guideSet)) %in% + validSeqnames + if (any(protospacers)){ + gr <- guideSet[protospacers] + protospacerRanges <- crisprBase::getProtospacerRanges( + gr=gr, + nuclease=nuclease) + guideSet[protospacers] <- protospacerRanges + } + } } + out$start <- as.integer(BiocGenerics::start(guideSet)) + out$end <- as.integer(BiocGenerics::end(guideSet)) out$strand <- as.character(BiocGenerics::strand(guideSet)) return(out) } @@ -169,14 +181,14 @@ GuideSet2DataFrames <- function(guideSet, .getMcolsTable_flat <- function(guideSet){ meta <- S4Vectors::mcols(guideSet) coltypes <- .getDFColtypes(meta) - wh <- which(coltypes=="DNAStringSet") + wh <- which(coltypes == "DNAStringSet") for (k in seq_along(wh)){ - meta[,wh[k]] <- as.character(meta[,wh[k]]) + meta[, wh[k]] <- as.character(meta[, wh[k]]) } coltypes <- .getDFColtypes(meta) wh <- which(coltypes %in% .coltypes_flat) - if (length(wh)>0){ - meta <- meta[,wh,drop=FALSE] + if (length(wh) > 0){ + meta <- meta[, wh, drop=FALSE] } else { meta <- NULL } @@ -192,25 +204,20 @@ GuideSet2DataFrames <- function(guideSet, useSpacerCoordinates=TRUE ){ nuclease <- crisprNuclease(guideSet) - if (colname=="alignments"){ - out <- crisprDesign::alignments(guideSet, - unlist=TRUE) - } else if (colname=="geneAnnotation"){ - out <- crisprDesign::geneAnnotation(guideSet, - unlist=TRUE) - } else if (colname=="tssAnnotation"){ - out <- crisprDesign::tssAnnotation(guideSet, - unlist=TRUE) - } else if (colname=="snps"){ - out <- crisprDesign::snps(guideSet, - unlist=TRUE) - } else if (colname=="enzymeAnnotation"){ - out <- crisprDesign::enzymeAnnotation(guideSet, - unlist=TRUE) - } else { - stop("colname not found in colnames(mcols(guideset)).") - } - if (is(out, "GRanges")){ + out <- switch(colname, + "alignments"=alignments(guideSet, unlist=TRUE), + "geneAnnotation"=geneAnnotation(guideSet, unlist=TRUE), + "tssAnnotation"=tssAnnotation(guideSet, unlist=TRUE), + "snps"=snps(guideSet, unlist=TRUE), + "enzymeAnnotation"=enzymeAnnotation(guideSet, unlist=TRUE), + "txTable"=txTable(guideSet, unlist=TRUE), + "exonTable"=exonTable(guideSet, unlist=TRUE), + "editedAlleles"=editedAlleles(guideSet, unlist=TRUE), + NULL) + stopifnot("colname not found in colnames(mcols(guideset))." = { + !is.null(out) + }) + if (methods::is(out, "GRanges")){ out$ID <- names(out) out <- .getPrimaryTable(out, useSpacerCoordinates=useSpacerCoordinates, @@ -238,7 +245,7 @@ GuideSet2DataFrames <- function(guideSet, .putColumnFirst <- function(col, df){ cols <- c(col, setdiff(colnames(df), col)) - df <- df[,cols, drop=FALSE] + df <- df[, cols, drop=FALSE] return(df) } @@ -246,9 +253,9 @@ GuideSet2DataFrames <- function(guideSet, .safeFormatColumns <- function(df){ cols <- intersect(.coltypes_integer, colnames(df)) - if (length(cols)>0){ + if (length(cols) > 0){ for (k in seq_along(cols)){ - df[,cols[k]] <- as.integer(df[,cols[k]]) + df[, cols[k]] <- as.integer(df[, cols[k]]) } } return(df) @@ -258,8 +265,8 @@ GuideSet2DataFrames <- function(guideSet, .getDFColtypes <- function(df){ types <- vapply(seq_len(ncol(df)), function(i){ - class(df[,i]) - }, FUN.VALUE="a") + class(df[, i]) + }, FUN.VALUE=character(1)) return(types) } @@ -276,4 +283,3 @@ cols_aln <- c(cols_aln, cols_aln_c, cols_aln_p) "tss_pos", "anchor_site", "dist_to_tss") .coltypes_integer <- c(.coltypes_integer, cols_aln) .coltypes_flat <- c("numeric", "logical", "integer", "character") - diff --git a/R/addEditedAlleles.R b/R/addEditedAlleles.R index cc6290d..050c8d5 100644 --- a/R/addEditedAlleles.R +++ b/R/addEditedAlleles.R @@ -84,8 +84,7 @@ addEditedAlleles <- function(guideSet, genome <- GenomeInfoDb::genome(guideSet[guide]) genome <- genome[seqname] if (genome == "ntc"){ - S4Vectors::DataFrame(seq=DNAStringSet(character(0)), - score=numeric(0)) + .getEditedAlleles_ntc() } else { .getEditedAllelesPerGuide(gs=guideSet[guide], baseEditor=baseEditor, @@ -102,6 +101,7 @@ addEditedAlleles <- function(guideSet, .addFunctionalConsequences, txTable) } + names(alleles) <- names(guideSet) mcols(guideSet)[["editedAlleles"]] <- alleles if (addSummary){ @@ -161,6 +161,23 @@ addEditedAlleles <- function(guideSet, +.getEditedAlleles_ntc <- function(){ + df <- S4Vectors::DataFrame(seq=DNAStringSet(character(0)), + score=numeric(0), + row.names=character(0)) + metadata(df)$wildtypeAllele <- NA_character_ + metadata(df)$start <- NA_real_ + metadata(df)$end <- NA_real_ + metadata(df)$chr <- NA_character_ + metadata(df)$strand <- NA_character_ + metadata(df)$editingWindow <- NA_real_ + metadata(df)$wildtypeAmino <- NA_character_ + return(df) +} + + + + # Get the set of predicted edited alleles for each gRNA #' @importFrom crisprBase editingStrand .getEditedAllelesPerGuide <- function(gs, @@ -319,6 +336,8 @@ addEditedAlleles <- function(guideSet, metadata(editedAlleles)$strand <- strand metadata(editedAlleles)$editingWindow <- editingWindow editedAlleles$seq <- DNAStringSet(editedAlleles$seq) + rownames(editedAlleles) <- rep(names(gs), nrow(editedAlleles)) + return(editedAlleles) } @@ -440,7 +459,7 @@ addEditedAlleles <- function(guideSet, stop("editedAlleles are not on the same chromosome.") } editedAlleles$variant <- "not_targeting" - txTable <- txTable[txTable$region=="CDS",,drop=FALSE] + txTable <- txTable[txTable$region == "CDS", , drop=FALSE] geneStrand <- metadata(txTable)$gene_strand guideStrand <- metadata(editedAlleles)$strand start <- metadata(editedAlleles)$start @@ -448,16 +467,17 @@ addEditedAlleles <- function(guideSet, editingPositions <- start:end overlapPositions <- editingPositions[editingPositions %in% txTable$pos] - if (length(overlapPositions)==0){ + if (length(overlapPositions) == 0){ + editedAlleles$aa <- NA_character_ return(editedAlleles) } # Getting nucleotide to replace sequences <- editedAlleles$seq - if (geneStrand!=guideStrand){ + if (geneStrand != guideStrand){ sequences <- complement(sequences) } - if (guideStrand=="-"){ + if (guideStrand == "-"){ sequences <- reverse(sequences) } nucs <- as.matrix(sequences) @@ -467,7 +487,7 @@ addEditedAlleles <- function(guideSet, # Get wildtype protein: wh <- match(overlapPositions, txTable$pos) - txTable <- txTable[order(txTable$pos_cds),,drop=FALSE] + txTable <- txTable[order(txTable$pos_cds), , drop=FALSE] nuc <- txTable$nuc protein <- translate(DNAString(paste0(nuc, collapse=""))) protein <- as.vector(protein) @@ -490,7 +510,7 @@ addEditedAlleles <- function(guideSet, } } return(effect) - }, FUN.VALUE="a") + }, FUN.VALUE=character(1)) aminos <- vapply(seq_len(nrow(nucs)), function(k){ editedNuc <- nuc @@ -503,7 +523,7 @@ addEditedAlleles <- function(guideSet, aas <- paste0(aas, collapse="") return(aas) - }, FUN.VALUE="a") + }, FUN.VALUE=character(1)) editedAlleles$variant <- effects editedAlleles$aa <- aminos diff --git a/R/addExonTable.R b/R/addExonTable.R index a4c59b0..f69a265 100644 --- a/R/addExonTable.R +++ b/R/addExonTable.R @@ -90,7 +90,11 @@ addExonTable_consensusIsoform <- function(guideSet, out[df[k,1],df[k,2]] <- 1 } out <- DataFrame(out) - mcols(guideSet)$exonTable <- out + splitFactor <- factor(BiocGenerics::rownames(out), + levels=names(guideSet)) + out <- S4Vectors::split(out, f=splitFactor) + S4Vectors::mcols(guideSet)[["exonTable"]] <- out + # mcols(guideSet)$exonTable <- out return(guideSet) } @@ -127,7 +131,11 @@ addExonTable_allIsoforms <- function(guideSet, out[df[k,1],df[k,2]] <- df[[valueColumn]][k] } out <- DataFrame(out) - mcols(guideSet)$exonTable <- out + splitFactor <- factor(BiocGenerics::rownames(out), + levels=names(guideSet)) + out <- S4Vectors::split(out, f=splitFactor) + S4Vectors::mcols(guideSet)[["exonTable"]] <- out + # mcols(guideSet)$exonTable <- out return(guideSet) } diff --git a/R/addTxTable.R b/R/addTxTable.R index 8617025..996bdab 100644 --- a/R/addTxTable.R +++ b/R/addTxTable.R @@ -39,8 +39,9 @@ #' #' @seealso \code{\link{addGeneAnnotation}} to add gene annotation. #' -#' @export #' @rdname addTxTable +#' @importFrom S4Vectors split mcols<- +#' @export addTxTable <- function(guideSet, gene_id, txObject, @@ -73,7 +74,10 @@ addTxTable <- function(guideSet, out[df[k,1],df[k,2]] <- df[[valueColumn]][k] } out <- DataFrame(out) - mcols(guideSet)$txTable <- out + splitFactor <- factor(BiocGenerics::rownames(out), + levels=names(guideSet)) + out <- S4Vectors::split(out, f=splitFactor) + S4Vectors::mcols(guideSet)[["txTable"]] <- out return(guideSet) } diff --git a/man/GuideSet-class.Rd b/man/GuideSet-class.Rd index 72c3293..1c8aaa7 100644 --- a/man/GuideSet-class.Rd +++ b/man/GuideSet-class.Rd @@ -17,6 +17,8 @@ \alias{tssAnnotation} \alias{enzymeAnnotation} \alias{editedAlleles} +\alias{txTable} +\alias{exonTable} \alias{tssAnnotation<-} \alias{geneAnnotation<-} \alias{enzymeAnnotation<-} @@ -52,6 +54,8 @@ \alias{editedAlleles,GuideSet-method} \alias{tssAnnotation,GuideSet-method} \alias{enzymeAnnotation,GuideSet-method} +\alias{txTable,GuideSet-method} +\alias{exonTable,GuideSet-method} \title{An S4 class to store CRISPR gRNA sequences with modular annotations.} \usage{ crisprNuclease(object, ...) @@ -84,6 +88,10 @@ enzymeAnnotation(object, ...) editedAlleles(object, ...) +txTable(object, ...) + +exonTable(object, ...) + tssAnnotation(object) <- value geneAnnotation(object) <- value @@ -178,7 +186,7 @@ GuideSet( use.names = TRUE ) -\S4method{editedAlleles}{GuideSet}(object) +\S4method{editedAlleles}{GuideSet}(object, unlist = TRUE, use.names = TRUE) \S4method{tssAnnotation}{GuideSet}( object, @@ -189,6 +197,10 @@ GuideSet( ) \S4method{enzymeAnnotation}{GuideSet}(object, unlist = TRUE, use.names = TRUE) + +\S4method{txTable}{GuideSet}(object, unlist = TRUE, use.names = TRUE) + +\S4method{exonTable}{GuideSet}(object, unlist = TRUE, use.names = TRUE) } \arguments{ \item{object}{\linkS4class{GuideSet} object.} diff --git a/man/GuideSet2DataFrames.Rd b/man/GuideSet2DataFrames.Rd index 943a993..79d58f1 100644 --- a/man/GuideSet2DataFrames.Rd +++ b/man/GuideSet2DataFrames.Rd @@ -44,5 +44,5 @@ tables <- GuideSet2DataFrames(guideSetExampleFullAnnotation) } \author{ -Jean-Philippe Fortin +Jean-Philippe Fortin, Luke Hoberecht } diff --git a/tests/testthat/test-GuideSet2DataFrames.R b/tests/testthat/test-GuideSet2DataFrames.R new file mode 100644 index 0000000..5e55c11 --- /dev/null +++ b/tests/testthat/test-GuideSet2DataFrames.R @@ -0,0 +1,159 @@ +data("guideSetExample") +data("guideSetExampleFullAnnotation") + + +test_that("GuideSet2DataFrames returns expected output for basic GuideSet", { + expect_error(res1 <- GuideSet2DataFrames(guideSetExample, + useSpacerCoordinates=TRUE, + primaryOnly=FALSE), + regexp=NA) + expect_error(res2 <- GuideSet2DataFrames(guideSetExample, + useSpacerCoordinates=TRUE, + primaryOnly=TRUE), + regexp=NA) + expect_error(res3 <- GuideSet2DataFrames(guideSetExample, + useSpacerCoordinates=FALSE, + primaryOnly=TRUE), + regexp=NA) + expect_error(res4 <- GuideSet2DataFrames(guideSetExample, + useSpacerCoordinates=FALSE, + primaryOnly=FALSE), + regexp=NA) + expect_identical(res1, res2) + expect_identical(res3, res4) + expect_false(identical(res1, res3)) + expect_equal(names(res1), "primary") + expect_equal(names(res3), "primary") + expect_type(res1, "list") + expect_type(res3, "list") + expect_equal(nrow(res1[["primary"]]), length(guideSetExample)) + expect_equal(nrow(res3[["primary"]]), length(guideSetExample)) +}) + + +test_that("GuideSet2DataFrames handles fully-annotated GuideSets", { + expect_error(res1 <- GuideSet2DataFrames(guideSetExampleFullAnnotation, + useSpacerCoordinates=TRUE, + primaryOnly=FALSE), + regexp=NA) + expect_error(res2 <- GuideSet2DataFrames(guideSetExampleFullAnnotation, + useSpacerCoordinates=TRUE, + primaryOnly=TRUE), + regexp=NA) + expect_error(res3 <- GuideSet2DataFrames(guideSetExampleFullAnnotation, + useSpacerCoordinates=FALSE, + primaryOnly=TRUE), + regexp=NA) + expect_error(res4 <- GuideSet2DataFrames(guideSetExampleFullAnnotation, + useSpacerCoordinates=FALSE, + primaryOnly=FALSE), + regexp=NA) + expect_false(identical(res1, res2)) + expect_false(identical(res1, res3)) + expect_false(identical(res1, res4)) + expect_false(identical(res2, res3)) + expect_false(identical(res2, res4)) + expect_false(identical(res3, res4)) + expect_setequal(names(res1), c("primary", "alignments", "geneAnnotation", + "tssAnnotation", "enzymeAnnotation", "snps")) + expect_equal(names(res2), "primary") + expect_equal(names(res3), "primary") + expect_setequal(names(res4), c("primary", "alignments", "geneAnnotation", + "tssAnnotation", "enzymeAnnotation", "snps")) + expect_type(res1, "list") + expect_type(res2, "list") + expect_type(res3, "list") + expect_type(res4, "list") + expect_equal(nrow(res1[["primary"]]), length(guideSetExampleFullAnnotation)) + expect_equal(nrow(res2[["primary"]]), length(guideSetExampleFullAnnotation)) + expect_equal(nrow(res3[["primary"]]), length(guideSetExampleFullAnnotation)) + expect_equal(nrow(res4[["primary"]]), length(guideSetExampleFullAnnotation)) + + ## add tests for txTable, exonTable, editedAlleles +}) + + + +test_that("GuideSet2DataFrames handles basic GuideSets with NTCs", { + ntcs <- c("ntc_1"=paste0(rep("A", spacerLength(guideSetExample)), + collapse="")) + gs <- addNtcs(guideSetExample, ntcs) + + expect_error(res1 <- GuideSet2DataFrames(gs, + useSpacerCoordinates=TRUE, + primaryOnly=FALSE), + regexp=NA) + expect_error(res2 <- GuideSet2DataFrames(gs, + useSpacerCoordinates=TRUE, + primaryOnly=TRUE), + regexp=NA) + expect_error(res3 <- GuideSet2DataFrames(gs, + useSpacerCoordinates=FALSE, + primaryOnly=TRUE), + regexp=NA) + expect_error(res4 <- GuideSet2DataFrames(gs, + useSpacerCoordinates=FALSE, + primaryOnly=FALSE), + regexp=NA) + expect_identical(res1, res2) + expect_identical(res3, res4) + expect_false(identical(res1, res3)) + expect_equal(names(res1), "primary") + expect_equal(names(res3), "primary") + expect_type(res1, "list") + expect_type(res3, "list") + expect_equal(nrow(res1[["primary"]]), length(gs)) + expect_equal(nrow(res3[["primary"]]), length(gs)) + + ntc_index <- which(res1$primary$ID == "ntc_1") + expect_equal(res1$primary$start[ntc_index], 0) + expect_equal(res1$primary$end[ntc_index], 0) +}) + + + +test_that("GuideSet2DataFrames handles fully-annotated GuideSets with NTCs", { + ntcs <- c("ntc_1"=paste0( + rep("A", spacerLength(guideSetExampleFullAnnotation)), + collapse="")) + gs <- addNtcs(guideSetExampleFullAnnotation, ntcs) + allAnnotationNames <- c("primary", "alignments", "geneAnnotation", + "tssAnnotation", "enzymeAnnotation", "snps") + + expect_error(res1 <- GuideSet2DataFrames(gs, + useSpacerCoordinates=TRUE, + primaryOnly=FALSE), + regexp=NA) + expect_error(res2 <- GuideSet2DataFrames(gs, + useSpacerCoordinates=TRUE, + primaryOnly=TRUE), + regexp=NA) + expect_error(res3 <- GuideSet2DataFrames(gs, + useSpacerCoordinates=FALSE, + primaryOnly=TRUE), + regexp=NA) + expect_error(res4 <- GuideSet2DataFrames(gs, + useSpacerCoordinates=FALSE, + primaryOnly=FALSE), + regexp=NA) + expect_false(identical(res1, res2)) + expect_false(identical(res1, res3)) + expect_false(identical(res1, res4)) + expect_false(identical(res2, res3)) + expect_false(identical(res2, res4)) + expect_false(identical(res3, res4)) + expect_setequal(names(res1), allAnnotationNames) + expect_equal(names(res2), "primary") + expect_equal(names(res3), "primary") + expect_setequal(names(res4), allAnnotationNames) + expect_type(res1, "list") + expect_type(res2, "list") + expect_type(res3, "list") + expect_type(res4, "list") + expect_equal(nrow(res1[["primary"]]), length(gs)) + expect_equal(nrow(res2[["primary"]]), length(gs)) + expect_equal(nrow(res3[["primary"]]), length(gs)) + expect_equal(nrow(res4[["primary"]]), length(gs)) + + ## add tests for txTable, exonTable, editedAlleles +})