Skip to content

Commit f6a2a8f

Browse files
committed
See Issue #14, Issue #18, Issue #19
1 parent ad1f46b commit f6a2a8f

8 files changed

+218
-75
lines changed

R/CloseBySingleRegion.R

+8-4
Original file line numberDiff line numberDiff line change
@@ -53,15 +53,19 @@ CloseBySingleRegion <- function(
5353
genome <- match.arg(genome)
5454
arrayType <- match.arg(arrayType)
5555
CpGsOrdered_df <- OrderCpGsByLocation(
56-
CpGs_char, genome, arrayType, manifest_gr, output = "dataframe"
56+
CpGs_char = CpGs_char,
57+
genome = genome,
58+
arrayType = arrayType,
59+
manifest_gr = manifest_gr,
60+
output = "dataframe"
5761
)
5862

59-
### Find close by clusters ###
63+
# Find close by clusters
6064
chr <- CpGsOrdered_df$chr
6165
pos <- CpGsOrdered_df$pos
62-
CpGsOrdered_df$cluster <- clusterMaker(chr, pos, maxGap = maxGap)
66+
CpGsOrdered_df$cluster <- clusterMaker(chr = chr, pos = pos, maxGap = maxGap)
6367

64-
### Create list of vectors of CpGs in each cluster ###
68+
# Create list of vectors of CpGs in each cluster
6569
CpGsRegion_ls <- split(CpGsOrdered_df$cpg, CpGsOrdered_df$cluster)
6670

6771
### Filter for clusters with number of CpGs >= minCpGs ###

R/CoMethSingleRegion.R

+51-42
Original file line numberDiff line numberDiff line change
@@ -77,57 +77,66 @@ CoMethSingleRegion <- function(CpGs_char,
7777
genome <- match.arg(genome)
7878
method <- match.arg(method)
7979

80+
8081
### Order CpGs by genomic location ###
8182
CpGsOrdered_df <- OrderCpGsByLocation(
82-
CpGs_char, genome, arrayType, manifest_gr, output = "dataframe"
83+
CpGs_char = CpGs_char,
84+
genome = genome,
85+
arrayType = arrayType,
86+
manifest_gr = manifest_gr,
87+
output = "dataframe"
8388
)
8489

90+
8591
### Extract beta matrix for the input CpGs ###
8692
# take common cpgs in beta matrix and the region first
8793
commonCpGs_char <- intersect(CpGsOrdered_df$cpg, row.names(dnam))
8894

89-
if (length(commonCpGs_char) >= minCpGs){
90-
91-
betaCluster_mat <- dnam[commonCpGs_char, ]
92-
93-
### Transpose beta matrix ###
94-
betaClusterTransp_mat <- t(betaCluster_mat)
95-
96-
### Mark comethylated CpGs ###
97-
keepCpGs_df <- MarkComethylatedCpGs(
98-
betaCluster_mat = betaClusterTransp_mat,
99-
method = method,
100-
betaToM = betaToM,
101-
rDropThresh_num
102-
)
103-
104-
### Find contiguous comethylated regions ###
105-
keepContiguousCpGs_df <- FindComethylatedRegions(
106-
CpGs_df = keepCpGs_df
107-
)
108-
109-
### Split CpG dataframe by Subregion ###
110-
keepContiguousCpGs_ls <- SplitCpGDFbyRegion(
111-
keepContiguousCpGs_df, genome, arrayType, manifest_gr, returnAllCpGs
112-
)
113-
114-
### Create Output Data Frame ###
115-
coMethCpGs_df <- CreateOutputDF(
116-
keepCpGs_df, keepContiguousCpGs_df, CpGsOrdered_df, returnAllCpGs
117-
)
118-
119-
### Create output list of data frame and CpGs by subregion ###
120-
coMethCpGs_ls <- list(
121-
contiguousRegions = coMethCpGs_df,
122-
CpGsSubregions = keepContiguousCpGs_ls
123-
)
124-
125-
coMethCpGs_ls
126-
127-
} else {
95+
if (length(commonCpGs_char) < minCpGs){
12896
return(NULL)
97+
} else {
98+
99+
### Mark comethylated CpGs ###
100+
betaClusterT_mat <- t( dnam[commonCpGs_char, ] )
101+
102+
keepCpGs_df <- MarkComethylatedCpGs(
103+
betaCluster_mat = betaClusterT_mat,
104+
method = method,
105+
betaToM = betaToM,
106+
rDropThresh_num
107+
)
108+
109+
# Find contiguous comethylated regions
110+
keepContiguousCpGs_df <- FindComethylatedRegions(
111+
CpGs_df = keepCpGs_df
112+
)
113+
114+
115+
### Split CpG dataframe by Subregion ###
116+
keepContiguousCpGs_ls <- SplitCpGDFbyRegion(
117+
CpGsSubregions_df = keepContiguousCpGs_df,
118+
genome = genome,
119+
arrayType = arrayType,
120+
manifest_gr = manifest_gr,
121+
returnAllCpGs = returnAllCpGs
122+
)
123+
124+
125+
### Create Output ###
126+
coMethCpGs_df <- CreateOutputDF(
127+
keepCpGs_df = keepCpGs_df,
128+
keepContiguousCpGs_df = keepContiguousCpGs_df,
129+
CpGsOrdered_df = CpGsOrdered_df,
130+
returnAllCpGs = returnAllCpGs
131+
)
132+
133+
coMethCpGs_ls <- list(
134+
contiguousRegions = coMethCpGs_df,
135+
CpGsSubregions = keepContiguousCpGs_ls
136+
)
137+
138+
coMethCpGs_ls
139+
129140
}
130141

131-
132-
133142
}

R/CpGsInfoAllRegions.R

+29-8
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#' @param contPheno_char character string of the continuous phenotype to be
1919
#' tested against methylation values
2020
#' @param covariates_char character vector of covariate variables names
21+
#' @param genome human genome of reference hg19 (default) or hg38
2122
#' @param arrayType Type of array, can be "450k" or "EPIC"
2223
#'
2324
#' @return a data frame with locations of the genomic region (Region), CpG ID
@@ -47,27 +48,47 @@
4748
CpGsInfoAllRegions <- function(AllRegionNames_char,
4849
allRegions_gr = NULL,
4950
betas_df, pheno_df,
50-
contPheno_char, covariates_char,
51+
contPheno_char,
52+
covariates_char,
53+
genome = c("hg19", "hg38"),
5154
arrayType = c("450k", "EPIC")){
52-
55+
56+
### Inputs ###
57+
58+
# Available manifest files are:
59+
# "EPIC.hg19.manifest" "EPIC.hg38.manifest"
60+
# "HM450.hg19.manifest" "HM450.hg38.manifest"
61+
genome <- match.arg(genome)
5362
arrayType <- match.arg(arrayType)
63+
manifest <- paste(
64+
switch(arrayType, "450k" = "HM450", "EPIC" = "EPIC"),
65+
genome, "manifest",
66+
sep = "."
67+
)
68+
CpGlocations.gr <- ImportSesameData(manifest)
69+
70+
# Regions
5471
if (!is.null(allRegions_gr)) {
5572
AllRegionNames_char <- as.character(allRegions_gr)
5673
}
5774

75+
76+
### Apply ###
5877
resultsAllRegions_ls <- lapply(
5978
AllRegionNames_char,
6079
FUN = CpGsInfoOneRegion,
6180
region_gr = NULL,
62-
betas_df,
63-
pheno_df,
64-
contPheno_char,
65-
covariates_char,
66-
arrayType
81+
betas_df = betas_df,
82+
pheno_df = pheno_df,
83+
contPheno_char = contPheno_char,
84+
covariates_char = covariates_char,
85+
arrayType = arrayType,
86+
manifest_gr = CpGlocations.gr
6787
)
6888

89+
90+
### Return ###
6991
resultsAllRegions_df <- do.call(rbind, resultsAllRegions_ls)
70-
7192
unique(resultsAllRegions_df)
7293

7394
}

R/CpGsInfoOneRegion.R

+49-18
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,12 @@
1414
#' @param contPheno_char character string of the continuous phenotype to be
1515
#' tested against methylation values
1616
#' @param covariates_char character vector of covariate variables names
17+
#' @param genome human genome of reference hg19 (default) or hg38
1718
#' @param arrayType Type of array, can be "450k" or "EPIC"
19+
#' @param manifest_gr A GRanges object with the genome manifest (as returned by
20+
#' \code{\link[ExperimentHub]{ExperimentHub}} or by
21+
#' \code{\link{ImportSesameData}}). This function by default ignores this
22+
#' argument in favour of the \code{genome} and \code{arrayType} arguments.
1823
#'
1924
#' @return a data frame with location of the genomic region (Region), CpG ID
2025
#' (cpg), chromosome (chr), position (pos), results for testing association of
@@ -55,10 +60,15 @@ CpGsInfoOneRegion <- function(
5560
pheno_df,
5661
contPheno_char,
5762
covariates_char = NULL,
58-
arrayType = c("450k","EPIC")
63+
genome = c("hg19", "hg38"),
64+
arrayType = c("450k", "EPIC"),
65+
manifest_gr = NULL
5966
){
6067
# browser()
6168

69+
70+
### Inputs ###
71+
genome <- match.arg(genome)
6272
arrayType <- match.arg(arrayType)
6373

6474
switch(
@@ -74,29 +84,39 @@ CpGsInfoOneRegion <- function(
7484

7585
### Extract individual CpGs in the region ###
7686
if (is.null(region_gr)) {
77-
CpGsToTest_char <- GetCpGsInRegion(regionName_char, arrayType = arrayType)
87+
88+
CpGsToTest_char <- GetCpGsInRegion(
89+
regionName_char = regionName_char,
90+
genome = genome,
91+
arrayType = arrayType,
92+
manifest_gr = manifest_gr
93+
)
94+
7895
} else {
96+
7997
CpGsToTest_char <- GetCpGsInRegion(
80-
region_gr = region_gr, arrayType = arrayType
98+
region_gr = region_gr,
99+
genome = genome,
100+
arrayType = arrayType,
101+
manifest_gr = manifest_gr
81102
)
82103
regionName_char <- as.character(region_gr)
104+
83105
}
84106

85-
### Transpose dnam from wide to long ###
86-
CpGsBeta_df <- betas_df[
87-
which(rownames(betas_df) %in% CpGsToTest_char),
88-
]
89-
90-
### Calculate M values ###
107+
108+
### Wrangle and Tidy Data ###
109+
CpGsBeta_df <- betas_df[rownames(betas_df) %in% CpGsToTest_char, ]
91110
CpGsMvalue_df <- log2(CpGsBeta_df / (1 - CpGsBeta_df))
92111

93-
### Match samples to test in pheno and beta data frames ###
112+
# Match samples to test in pheno and beta data frames
94113
rownames(pheno_df) <- pheno_df$Sample
95114
samplesToTest <- intersect(colnames(CpGsMvalue_df), rownames(pheno_df))
96115
phenoTest_df <- pheno_df[samplesToTest, ]
97-
CpGsMvalueTest_df <- CpGsMvalue_df[ ,samplesToTest]
116+
CpGsMvalueTest_df <- CpGsMvalue_df[ , samplesToTest]
98117

99-
### Run linear model for each CpG ###
118+
119+
### Function to run linear model for each CpG ###
100120
if (is.null(covariates_char)){
101121

102122
lmF <- function(Mvalue) {
@@ -116,13 +136,23 @@ CpGsInfoOneRegion <- function(
116136

117137
}
118138

119-
resultAllCpGs <- data.frame(t(apply(CpGsMvalueTest_df, 1, lmF)))
120-
121-
### Return results ###
139+
140+
### Run the Models ###
141+
resultAllCpGs <- data.frame(
142+
t( apply(CpGsMvalueTest_df, 1, lmF) )
143+
)
122144
colnames(resultAllCpGs) <- c("slopeEstimate", "slopePval")
145+
146+
147+
### Wrangle results ###
123148
CpGsLocation <- OrderCpGsByLocation(
124-
CpGs_char = CpGsToTest_char, arrayType = arrayType, output = "dataframe"
149+
CpGs_char = CpGsToTest_char,
150+
genome = genome,
151+
arrayType = arrayType,
152+
manifest_gr = manifest_gr,
153+
output = "dataframe"
125154
)
155+
126156
outDF <- merge(
127157
CpGsLocation, resultAllCpGs,
128158
by.x = "cpg", by.y = "row.names", sort = FALSE
@@ -136,14 +166,15 @@ CpGsInfoOneRegion <- function(
136166

137167
outDF$slopeEstimate <- round(outDF$slopeEstimate, 4)
138168

139-
### Add annotations
169+
170+
### Add Annotations and Return ###
140171
CpGsAnno_df <- annotation_df[
141172
CpGsToTest_char,
142173
c("UCSC_RefGene_Name", "UCSC_RefGene_Accession", "UCSC_RefGene_Group")
143174
]
144175

145176
outAnno_df <- merge(
146-
outDF, CpGsAnno_df,
177+
outDF, CpGsAnno_df,
147178
by.x = "cpg", by.y = "row.names", sort = FALSE
148179
)
149180

R/GetCpGsInRegion.R

+2-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ GetCpGsInRegion <- function(
4040
ignoreStrand = TRUE
4141
){
4242

43-
arrayType <- match.arg(arrayType)
4443
genome <- match.arg(genome)
44+
arrayType <- match.arg(arrayType)
4545

4646

4747
### The GRanges Object ###
@@ -65,7 +65,7 @@ GetCpGsInRegion <- function(
6565
} else {
6666
gr <- region_gr
6767
}
68-
CpGlocations.gr <- subsetByOverlaps(CpGlocations.gr, gr)
68+
CpGlocations.gr <- subsetByOverlaps(x = CpGlocations.gr, ranges = gr)
6969

7070
OrderCpGsByLocation(
7171
names(CpGlocations.gr),

man/CpGsInfoAllRegions.Rd

+3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/CpGsInfoOneRegion.Rd

+10-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)