-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
112 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,25 @@ | ||
use std::fs::read_to_string; | ||
use std::path::Path; | ||
|
||
use anyhow::Result; | ||
use serde::{Deserialize, Serialize}; | ||
|
||
#[derive(Deserialize, Serialize, Debug, PartialEq)] | ||
pub struct TokenizerConfig { | ||
pub universe: String, | ||
pub hierarchical_universes: Option<Vec<String>>, | ||
pub universes: Vec<String>, | ||
pub exclude_ranges: Option<String>, | ||
} | ||
|
||
impl TokenizerConfig { | ||
/// | ||
/// Create a new tokenizer config. | ||
/// | ||
/// # Arguments | ||
/// - path: Path to the config file (a .toml) file. | ||
pub fn new(path: &Path) -> Result<TokenizerConfig> { | ||
let toml_str = read_to_string(path)?; | ||
let config: TokenizerConfig = toml::from_str(&toml_str)?; | ||
|
||
Ok(config) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
use std::collections::HashMap; | ||
|
||
use rust_lapper::{Lapper, Interval}; | ||
|
||
use crate::common::models::Universe; | ||
|
||
pub struct MetaTokenizer { | ||
pub universe: Universe, | ||
tree: HashMap<String, Lapper<u32,u32>> | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
chr17 7915738 7915777 0 | ||
chr6 157381091 157381200 0 | ||
chr2 168247745 168247800 0 | ||
chr4 16270164 16270220 1 | ||
chr6 7313181 7313245 1 | ||
chr10 70576200 70576231 2 | ||
chr1 151399431 151399527 2 | ||
chr2 203871200 203871375 2 | ||
chr2 203871387 203871588 2 | ||
chr12 54220192 54220409 2 | ||
chr9 3526071 3526165 3 | ||
chr9 3526183 3526269 3 | ||
chr7 1044556 1044591 3 | ||
chr8 65841729 65841752 4 | ||
chr8 65841823 65841921 4 | ||
chr2 206713923 206713976 5 | ||
chr19 48260083 48260280 5 | ||
chr15 28095897 28095963 5 | ||
chr17 78759156 78759193 5 | ||
chr17 78759222 78759311 5 | ||
chr12 121129062 121129088 6 | ||
chr1 110202920 110203109 6 | ||
chr13 74550022 74550411 6 | ||
chr15 49155856 49155887 7 | ||
chr15 49155935 49156182 8 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,2 @@ | ||
universe = "peaks.bed.gz" | ||
exclude_ranges = "excluderanges.bed.gz" | ||
hierarchical_universes = ["chroms.bed"] | ||
universes = ["peaks.bed.gz", "chroms.bed"] | ||
exclude_ranges = "excluderanges.bed.gz" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
universes = "peaks.bed.gz" | ||
hieracrhical_universes = ["chroms.bed"] | ||
exclude_ranges = "excluderanges.bed.gz" |