Skip to content

Commit

Permalink
potential solution to #69
Browse files Browse the repository at this point in the history
  • Loading branch information
donaldcampbelljr committed Jan 21, 2025
1 parent df443c2 commit e493a39
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 13 deletions.
7 changes: 7 additions & 0 deletions gtars/src/uniwig/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ pub fn create_uniwig_cli() -> Command {
.help("Count via score (narrowPeak only!)")
.action(ArgAction::SetTrue),
)
.arg(
Arg::new("skip-sort")
.long("skip-sort")
.short('k')
.help("Skip sorting starts and ends during reading of bed files.")
.action(ArgAction::SetTrue),
)

Check warning on line 103 in gtars/src/uniwig/cli.rs

View check run for this annotation

Codecov / codecov/patch

gtars/src/uniwig/cli.rs#L97-L103

Added lines #L97 - L103 were not covered by tests
.arg(
Arg::new("no-bamshift")
.long("no-bamshift")
Expand Down
7 changes: 6 additions & 1 deletion gtars/src/uniwig/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,9 @@ pub fn run_uniwig(matches: &ArgMatches) {
.expect("requires int value");

let score = matches.get_one::<bool>("score").unwrap_or_else(|| &false);

let skip = matches.get_one::<bool>("skip-sort").unwrap_or_else(|| &false);

Check warning on line 162 in gtars/src/uniwig/mod.rs

View check run for this annotation

Codecov / codecov/patch

gtars/src/uniwig/mod.rs#L160-L162

Added lines #L160 - L162 were not covered by tests
let bam_shift = matches
.get_one::<bool>("no-bamshift")
.unwrap_or_else(|| &true);
Expand Down Expand Up @@ -186,6 +189,7 @@ pub fn run_uniwig(matches: &ArgMatches) {
*debug,
*bam_shift,
*bam_scale,
*skip,

Check warning on line 192 in gtars/src/uniwig/mod.rs

View check run for this annotation

Codecov / codecov/patch

gtars/src/uniwig/mod.rs#L192

Added line #L192 was not covered by tests
)
.expect("Uniwig failed.");
}
Expand Down Expand Up @@ -215,6 +219,7 @@ pub fn uniwig_main(
debug: bool,
bam_shift: bool,
bam_scale: f32,
skip_sort: bool,
) -> Result<(), Box<dyn Error>> {
// Must create a Rayon thread pool in which to run our iterators
let pool = rayon::ThreadPoolBuilder::new()
Expand Down Expand Up @@ -254,7 +259,7 @@ pub fn uniwig_main(
Ok(FileType::BED) | Ok(FileType::NARROWPEAK) => {
// Pare down chromosomes if necessary
let mut final_chromosomes =
get_final_chromosomes(&input_filetype, filepath, &chrom_sizes, score);
get_final_chromosomes(&input_filetype, filepath, &chrom_sizes, score, skip_sort);

// Some housekeeping depending on output type
let og_output_type = output_type; // need this later for conversion
Expand Down
16 changes: 10 additions & 6 deletions gtars/src/uniwig/reading.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use std::path::Path;

/// Reads combined bed file from a given path.
/// Returns Vec of Chromosome struct
pub fn read_bed_vec(combinedbedpath: &str) -> Vec<Chromosome> {
pub fn read_bed_vec(combinedbedpath: &str, skip_sort: bool,) -> Vec<Chromosome> {
let default_score = 1; // this will later be used for the count, which, by default, was originally = 1
let path = Path::new(combinedbedpath);

Expand Down Expand Up @@ -59,8 +59,10 @@ pub fn read_bed_vec(combinedbedpath: &str) -> Vec<Chromosome> {
if String::from(parsed_chr.trim()) != chrom {
// If the parsed chrom is not the same as the current, sort, and then push to vector
// then reset chromosome struct using the newest parsed_chr
chromosome.starts.sort_unstable();
chromosome.ends.sort_unstable();
if !skip_sort {
chromosome.starts.sort_unstable();
chromosome.ends.sort_unstable();
}

chromosome_vec.push(chromosome.clone());

Expand All @@ -75,9 +77,11 @@ pub fn read_bed_vec(combinedbedpath: &str) -> Vec<Chromosome> {
chromosome.ends.push((parsed_end, default_score));
}

// Is this final sort and push actually necessary?
chromosome.starts.sort_unstable();
chromosome.ends.sort_unstable();
if !skip_sort {
// Is this final sort and push actually necessary?
chromosome.starts.sort_unstable();
chromosome.ends.sort_unstable();
}
chromosome_vec.push(chromosome.clone());

println!("Reading Bed file complete.");
Expand Down
7 changes: 4 additions & 3 deletions gtars/src/uniwig/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,18 +52,19 @@ pub fn get_final_chromosomes(
filepath: &str,
chrom_sizes: &std::collections::HashMap<String, u32>,
score: bool,
skip_sort: bool,
) -> Vec<Chromosome> {
let chromosomes: Vec<Chromosome> = match ft {
Ok(FileType::BED) => read_bed_vec(filepath),
Ok(FileType::BED) => read_bed_vec(filepath, skip_sort),
Ok(FileType::NARROWPEAK) => {
if score {
println!("FileType is NarrowPeak and Score = True...Counting based on Score");
read_narrow_peak_vec(filepath) // if score flag enabled, this will attempt to read narrowpeak scores
} else {
read_bed_vec(filepath)
read_bed_vec(filepath, skip_sort)

Check warning on line 64 in gtars/src/uniwig/utils.rs

View check run for this annotation

Codecov / codecov/patch

gtars/src/uniwig/utils.rs#L64

Added line #L64 was not covered by tests
}
}
_ => read_bed_vec(filepath),
_ => read_bed_vec(filepath, skip_sort),

Check warning on line 67 in gtars/src/uniwig/utils.rs

View check run for this annotation

Codecov / codecov/patch

gtars/src/uniwig/utils.rs#L67

Added line #L67 was not covered by tests
};

let num_chromosomes = chromosomes.len();
Expand Down
15 changes: 12 additions & 3 deletions gtars/tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,10 +443,10 @@ mod tests {

#[rstest]
fn test_read_bed_vec(path_to_bed_file: &str, path_to_bed_file_gzipped: &str) {
let result1 = read_bed_vec(path_to_bed_file);
let result1 = read_bed_vec(path_to_bed_file, false);
assert_eq!(result1.len(), 20);

let result2 = read_bed_vec(path_to_bed_file_gzipped);
let result2 = read_bed_vec(path_to_bed_file_gzipped, false);
assert_eq!(result2.len(), 20);
}

Expand Down Expand Up @@ -512,7 +512,7 @@ mod tests {

#[rstest]
fn test_read_bed_vec_length(path_to_sorted_small_bed_file: &str) {
let chromosomes: Vec<Chromosome> = read_bed_vec(path_to_sorted_small_bed_file);
let chromosomes: Vec<Chromosome> = read_bed_vec(path_to_sorted_small_bed_file, false);
let num_chromosomes = chromosomes.len();

assert_eq!(num_chromosomes, 5);
Expand Down Expand Up @@ -567,6 +567,7 @@ mod tests {
false,
true,
1.0,
false
)
.expect("Uniwig main failed!");

Expand Down Expand Up @@ -613,6 +614,7 @@ mod tests {
false,
true,
1.0,
false
)
.expect("Uniwig main failed!");

Expand Down Expand Up @@ -660,6 +662,7 @@ mod tests {
false,
true,
1.0,
false
)
.expect("Uniwig main failed!");

Expand Down Expand Up @@ -707,6 +710,7 @@ mod tests {
false,
true,
1.0,
false
)
.expect("Uniwig main failed!");
Ok(())
Expand Down Expand Up @@ -773,6 +777,7 @@ mod tests {
false,
true,
1.0,
false
);

assert!(result.is_ok());
Expand Down Expand Up @@ -841,6 +846,7 @@ mod tests {
false,
true,
1.0,
false
);

assert!(result.is_ok());
Expand Down Expand Up @@ -955,6 +961,7 @@ mod tests {
false,
true,
1.0,
false
);

assert!(result.is_ok());
Expand Down Expand Up @@ -1064,6 +1071,7 @@ mod tests {
false,
true,
1.0,
false
)
.expect("Uniwig main failed!");

Expand Down Expand Up @@ -1110,6 +1118,7 @@ mod tests {
false,
true,
1.0,
false
)
.expect("Uniwig main failed!");

Expand Down

0 comments on commit e493a39

Please sign in to comment.