Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev igd 45 #60

Merged
merged 11 commits into from
Dec 20, 2024
2 changes: 1 addition & 1 deletion gtars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ glob = "0.3.1"


[dev-dependencies]
rstest = "0.18.2"
rstest = "0.23.0"
tempfile = "3.8.1"
pretty_assertions = "1.4.0"
22 changes: 16 additions & 6 deletions gtars/src/igd/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,9 @@ pub struct igd_t {
pub mctg: i32, //data type: 0, 1, 2 etc; size differs
pub total: i64, // total region in each ctg
pub ctg: Vec<ctg_t>, // this is the list of contigs (of size n-ctg) // this might need to be a reference
pub total_regions: i32,
pub total_average: f32,
pub average_length: f32,
}

impl igd_t {
Expand Down Expand Up @@ -100,11 +103,11 @@ pub fn igd_get_create_matches(matches: &ArgMatches) {
.get_one::<String>("dbname")
.expect("File list path is required");

create_igd_f(output_path, filelist, db_output_name);
let _igd = create_igd_f(output_path, filelist, db_output_name);
}

/// Creates IGD database from a directory of bed files.
pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &String) {
pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &String) -> igd_t {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is igd_t a struct? What do you think about capitalizing it to be more "Rusty"?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At some point we can do a large refactor to tackle things like this. For example, some variables are named in a way that are not super clear. This is because they were ported directly from C code and I wanted to keep them aligned for troubleshooting purposes.

//println!("{}",db_output_name);
//Initialize IGD into Memory
let mut igd = igd_t::new();
Expand Down Expand Up @@ -373,14 +376,20 @@ pub fn create_igd_f(output_path: &String, filelist: &String, db_output_name: &St
// Sort tile data and save into single files per ctg
igd_save_db(&mut igd, output_path, db_output_name);

igd.total_regions=total_regions;
igd.total_average=total_avg_size;
igd.average_length= total_avg_size / total_regions as f32;

let save_path = format!("{}{}{}", output_path, db_output_name, ".igd");
println!("IGD saved to: {}", save_path);
println!(
"Total Intervals: {}, l_avg: {}",
total_regions,
total_avg_size / total_regions as f32
igd.total_regions,
igd.average_length
);
println!("nctg:{} nbp:{}", igd.nctg, igd.nbp);

igd // return for testing purposes
}

/// Saves the primary .igd database file by reading the temp_tiles, sorting them, and then writing the sorted tiles to disk.
Expand Down Expand Up @@ -560,7 +569,7 @@ pub fn igd_save_db(igd: &mut igd_t, output_path: &String, db_output_name: &Strin
let _ = main_db_file.write_all(&temp_buffer);
}

q.nCnts = 0;
//q.nCnts = 0;
}
}

Expand Down Expand Up @@ -631,7 +640,7 @@ pub fn igd_saveT(igd: &mut igd_t, output_file_path: &String) {
}
file.write_all(&buffer).unwrap();

current_tile.nCnts = current_tile.ncnts + 1;
current_tile.nCnts = current_tile.nCnts + current_tile.ncnts;

if current_tile.ncnts > 8 {
current_tile.mcnts = 8;
Expand Down Expand Up @@ -811,6 +820,7 @@ pub fn igd_add(
gdata.start = start;
gdata.end = end;
gdata.value = v;
//println!("Adding to igd, start {}, idx {}", start,idx);
gdata.idx = idx as i32;

igd.total += 1;
Expand Down
22 changes: 14 additions & 8 deletions gtars/src/igd/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,14 +296,14 @@ fn get_overlaps(
// );

if tmpi > 0 {
if n1 != *preIdx || ichr != *preChr {

// println!(
// "n1 != *preIdx || ichr!= *preChr {} vs {} {} vs {} \n",
// n1, preIdx, ichr, preChr
// );

//println!("Seek start here: {}",IGD.tIdx[ichr as usize][n1 as usize]);

//let ichr = 1;
db_reader
.seek(SeekFrom::Start(IGD.tIdx[ichr as usize][n1 as usize] as u64))
.unwrap();
Expand Down Expand Up @@ -333,8 +333,9 @@ fn get_overlaps(
let end = rdr.read_i32::<LittleEndian>().unwrap();
let value = rdr.read_i32::<LittleEndian>().unwrap();

//println!("for tmpi>0 where tmpi = {}", tmpi);
//println!("Looping through g_datat in temp files\n");
// println!("idx: {} start: {} end: {}\n", idx,start,end);
//println!("idx: {} start: {} end: {}\n", idx,start,end);

gData[i as usize] = gdata_t {
idx: idx,
Expand All @@ -352,7 +353,7 @@ fn get_overlaps(

if query_end > gData[0].start {
// sorted by start
//println!("query_end > gData[0].start: {} > {}", query_end,gData[0].start);
//println!("n1 != *preIdx || ichr != *preChr query_end > gData[0].start: {} > {}", query_end,gData[0].start);
// find the 1st rs<qe
tL = 0;
tR = tmpi1;
Expand All @@ -371,17 +372,19 @@ fn get_overlaps(
}
//--------------------------
for i in (0..=tL).rev() {
//println!("Countdownfrom TL");
// count down from tL (inclusive to tL)
//println!("iterate over i: {} ", i);
//println!("iterate over i: {} from tL {}", i, tL);
//println!("gdata[i].end {} vs query start {}",gData[i as usize].end,query_start);
if gData[i as usize].end > query_start {
//println!("ADDING TO HITS");
//println!(" > gData[i].end > query_start {} > {}", gData[i as usize].end, query_start);
hits[gData[i as usize].idx as usize] =
hits[gData[i as usize].idx as usize] + 1;
}
}
}
}


if n2 > n1 {
//println!("n2>n1 {} vs {} ", n2, n1);
Expand Down Expand Up @@ -423,8 +426,9 @@ fn get_overlaps(
let end = rdr.read_i32::<LittleEndian>().unwrap();
let value = rdr.read_i32::<LittleEndian>().unwrap();


//println!("Looping through g_datat in temp files\n");
//println!("idx: {} start: {} end: {}\n", idx,start,end);
// println!("idx: {} start: {} end: {}\n", idx,start,end);

gData.push(gdata_t {
idx: idx,
Expand All @@ -439,6 +443,7 @@ fn get_overlaps(
}

if query_end > gData[0].start {
//println!("n2>n1 query_end > gData[0].start: {} > {}", query_end,gData[0].start);
tS = 0;

while tS < tmpi && gData[tS as usize].start < bd {
Expand Down Expand Up @@ -478,6 +483,7 @@ fn get_overlaps(
}
}
}
//println!("here are the hits {:?}", hits);
return nols; //TODO this is from the original code but its not actually being used for anything. hits vec IS the main thing.
}

Expand Down Expand Up @@ -567,7 +573,7 @@ pub fn get_igd_info(
reader.read_exact(&mut buffer)?;
let nCtg = i32::from_le_bytes(buffer);

//println!("Found:\n nbp:{} gtype: {} nCtg: {}", nbp,gType,nCtg);
println!("Found:\n nbp:{} gtype: {} nCtg: {}", nbp,gType,nCtg);

igd.nbp = nbp;
igd.gType = gType;
Expand Down
15 changes: 0 additions & 15 deletions gtars/tests/data/igd_file_list/bad_bed_file.notbed

This file was deleted.

8 changes: 0 additions & 8 deletions gtars/tests/data/igd_file_list/bad_bed_file_2.notbed

This file was deleted.

4 changes: 0 additions & 4 deletions gtars/tests/data/igd_file_list/igd_bed_file_1.bed

This file was deleted.

37 changes: 0 additions & 37 deletions gtars/tests/data/igd_file_list/igd_bed_file_2.notbed

This file was deleted.

8 changes: 8 additions & 0 deletions gtars/tests/data/igd_file_list_01/igd_bed_file_1.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
chr1 1 100
chr1 200 300
chr1 32768 32868
chr1 49152 49352
chr2 1 100
chr2 200 300
chr3 32768 32868
chr3 49152 49352
8 changes: 8 additions & 0 deletions gtars/tests/data/igd_file_list_02/igd_bed_file_1.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
chr1 1 100
chr1 200 300
chr1 32768 32868
chr1 49152 49352
chr2 1 100
chr2 200 300
chr3 32768 32868
chr3 49152 49352
4 changes: 4 additions & 0 deletions gtars/tests/data/igd_file_list_02/igd_bed_file_2.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
chr2 652554 652780 SRX4150706.05_peak_5 157 . 2.14622 20.42377 15.73019 44
chr2 653837 654214 SRX4150706.05_peak_6 757 . 3.67362 82.37296 75.78497 191
chr11 951681 952010 SRX4150706.05_peak_247 205 . 11.82913 25.65609 20.56433 139
chr11 1248894 1249428 SRX4150706.05_peak_248 252 . 11.83432 30.63056 25.20567 179
8 changes: 8 additions & 0 deletions gtars/tests/data/igd_query_files/query1.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
chr1 1 100
chr1 200 300
chr1 32768 32868
chr1 49152 49352
chr2 1 100
chr2 200 300
chr3 32768 32868
chr3 49152 49352
2 changes: 2 additions & 0 deletions gtars/tests/data/igd_query_files/query2.bed
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
chr3 49152 49352
chr2 653837 654214 SRX4150706.05_peak_6 757 . 3.67362 82.37296 75.78497 191
Loading