Skip to content

Commit

Permalink
hpo
Browse files Browse the repository at this point in the history
  • Loading branch information
GuangchuangYu committed Aug 15, 2024
1 parent ad38261 commit 7310bcb
Show file tree
Hide file tree
Showing 9 changed files with 39 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
HPO
Binary file removed HDO.sqlite
Binary file not shown.
Binary file added HDO.sqlite.gz
Binary file not shown.
Binary file added HPO.sqlite.gz
Binary file not shown.
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
md5:
Rscript -e 'source("generate-md5.R")'


hpo:
Rscript -e 'source("create-HPO.R")'
22 changes: 22 additions & 0 deletions create-HPO.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
## Human Phenotype Ontology
## repo: https://github.com/obophenotype/human-phenotype-ontology
## release: https://github.com/obophenotype/human-phenotype-ontology/releases


pg <- read.delim("HPO/phenotype_to_genes.txt")
hpo2gene <- pg[, c("hpo_id", "ncbi_gene_id")]
hpo2gene <- na.omit(unique(hpo2gene)) |> setNames(c("id", "gene"))

# updated date
date <- 20240813


library(obolite)
create_sqlite("HPO/hp.obo", "HPO.sqlite",
name = "Human Phenotype Ontology",
date = date,
url = 'https://github.com/obophenotype/human-phenotype-ontology/releases/download/v2024-08-13/hp.obo',
ont2gene = hpo2gene
)


1 change: 1 addition & 0 deletions create-sqlite.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ url <- "https://github.com/DiseaseOntology/HumanDiseaseOntology/blob/main/src/on

create_sqlite("Downloads/HumanDO.obo", "HDO.sqlite", name, date, url)


10 changes: 10 additions & 0 deletions generate-md5.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
ff <- list.files(pattern = ".sqlite$")
md5 <- vapply(ff, function(f) digest::digest(f, algo='md5', file=TRUE), character(1))

if (file.exists("md5.txt")) {
x <- read.delim("md5.txt", header=F)
oldmd5 <- setNames(x[,2], x[,1])
md5 <- c(md5, oldmd5[!names(oldmd5) %in% names(md5)])
}


cat(sprintf("%s\t%s\n", ff, md5), file="md5.txt", sep="")

sapply(ff, R.utils::gzip, overwrite = TRUE)

1 change: 1 addition & 0 deletions md5.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
HDO.sqlite 41c0d4dc996b5bb4df194fc7d3c3f5b7
HPO.sqlite 1722ff49c2eb7162c77d1d48b055df1c

0 comments on commit 7310bcb

Please sign in to comment.