forked from sjackman/stat540-project
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoord_to_gene.R
30 lines (27 loc) · 1.02 KB
/
coord_to_gene.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# Convert a set of coordinates to a set of gene names
library(biomaRt)
library(GenomicRanges)
genes <- subset(subset = hgnc_symbol != '',
getBM(
c('hgnc_symbol', 'ensembl_gene_id', 'chromosome_name', 'start_position', 'end_position'),
mart=useMart('ensembl', 'hsapiens_gene_ensembl')))
genes.gr <- GRanges(
seqnames = Rle(genes$chromosome_name),
ranges = IRanges(start = genes$start_position, end = genes$end_position),
names = genes$hgnc_symbol)
coordToGene <- function(x) {
# Convert the coordinates of CpG islands to gene names.
cgi <- data.frame(t(simplify2array(strsplit(as.character(x), '[:-]'))),
row.names=x,
stringsAsFactors=FALSE)
colnames(cgi) <- c('chr', 'start', 'end')
cgi$chr <- factor(sub('chr', '', cgi$chr))
cgi$start <- as.numeric(cgi$start)
cgi$end <- as.numeric(cgi$end)
cgi.gr <- GRanges(
seqnames = Rle(cgi$chr),
ranges = IRanges(start=cgi$start, end=cgi$end),
names = rownames(cgi))
overlaps <- findOverlaps(cgi.gr, genes.gr)
return(unique(genes[subjectHits(overlaps), 'hgnc_symbol']))
}