Skip to content

Commit 2e5d9c5

Browse files
committed
simulate test data for keep_intervals
1 parent 7755a02 commit 2e5d9c5

File tree

10 files changed

+524
-137
lines changed

10 files changed

+524
-137
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ serde_json = {version = "1.0.114", optional = true}
2727
bincode = {version = "1.3.1", optional = true}
2828
tskit-derive = {version = "0.2.0", path = "tskit-derive", optional = true}
2929
delegate = "0.12.0"
30+
rand = "0.8.3"
31+
3032

3133
[dev-dependencies]
3234
anyhow = {version = "1.0.79"}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,4 +177,5 @@ mod tests {
177177
}
178178

179179
// Testing modules
180+
pub mod test_data;
180181
mod test_fixtures;

src/table_collection.rs

Lines changed: 70 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ use crate::TskReturnValue;
3232
use crate::{EdgeId, NodeId};
3333
use ll_bindings::tsk_id_t;
3434
use ll_bindings::tsk_size_t;
35+
use streaming_iterator::StreamingIterator;
3536

3637
/// A table collection.
3738
///
@@ -1379,30 +1380,51 @@ impl TableCollection {
13791380

13801381
/// Truncate the Table Collection to specified genome intervals.
13811382
///
1382-
/// # Error
1383-
/// Any errors from the C API propagate. An [TskitError::RangeError] will
1384-
/// occur when `intervals` are not sorted.
1383+
/// # Return
1384+
/// - `Ok(None)`: when truncation leads to empty edge table.
1385+
/// - `Ok(Some(TableCollection))`: when trunction is successfully performed
1386+
/// and results in non-empty edge table.
1387+
/// - `Error(TskitError)`: Any errors from the C API propagate. An
1388+
/// [TskitError::RangeError] will occur when `intervals` are not sorted.
13851389
///
13861390
/// # Example
13871391
/// ```rust
1388-
/// use tskit::TreeSequence;
1389-
/// let mut tables = TreeSequence::load("./testdata/1.trees")
1392+
/// # use tskit::test_data::simulation::simulate_two_treesequences;
1393+
/// # let intervals = vec![(10.0, 20.0), (700.0, 850.0)];
1394+
/// # let seqlen = 100.0;
1395+
/// # let popsize = 100;
1396+
/// # let totle_generations = 50;
1397+
/// # let popsplit_time = 10;
1398+
/// # let seed = 123;
1399+
1400+
/// # let (full_trees, _exepected) = simulate_two_treesequences(
1401+
/// # seqlen,
1402+
/// # popsize,
1403+
/// # totle_generations,
1404+
/// # popsplit_time,
1405+
/// # &intervals,
1406+
/// # seed,
1407+
/// # )
1408+
/// # .unwrap();
1409+
/// #
1410+
/// # let tables = full_trees.dump_tables().unwrap();
1411+
///
1412+
/// let _trucated_tables = tables
1413+
/// .keep_intervals(intervals.iter().map(|a| *a), true)
13901414
/// .unwrap()
1391-
/// .dump_tables()
1392-
/// .unwrap();
1393-
/// tables
1394-
/// .keep_intervals(vec![(10.0.into(), 130.0.into())].into_iter(), true)
13951415
/// .unwrap();
13961416
/// ```
13971417
///
13981418
/// Note that no new provenance will be appended.
1399-
pub fn keep_intervals(
1400-
&mut self,
1401-
intervals: impl Iterator<Item = (Position, Position)>,
1419+
pub fn keep_intervals<P>(
1420+
self,
1421+
intervals: impl Iterator<Item = (P, P)>,
14021422
simplify: bool,
1403-
) -> Result<(), TskitError> {
1404-
use streaming_iterator::StreamingIterator;
1405-
1423+
) -> Result<Option<Self>, TskitError>
1424+
where
1425+
P: Into<Position>,
1426+
{
1427+
let mut tables = self;
14061428
// use tables from sys to allow easier process with metadata
14071429
let options = 0;
14081430
let mut new_edges = crate::sys::EdgeTable::new(options)?;
@@ -1411,13 +1433,14 @@ impl TableCollection {
14111433
let mut new_mutations = crate::sys::MutationTable::new(options)?;
14121434

14131435
// for old site id to new site id mapping
1414-
let mut site_map = vec![-1i32; self.sites().num_rows().as_usize()];
1436+
let mut site_map = vec![-1i32; tables.sites().num_rows().as_usize()];
14151437

14161438
// logicals to indicate whether a site (old) will be kept in new site table
1417-
let mut keep_sites = vec![false; self.sites().num_rows().try_into()?];
1439+
let mut keep_sites = vec![false; tables.sites().num_rows().try_into()?];
14181440

14191441
let mut last_interval = (Position::from(0.0), Position::from(0.0));
14201442
for (s, e) in intervals {
1443+
let (s, e) = (s.into(), e.into());
14211444
// make sure intervals are sorted
14221445
if (s > e) || (s < last_interval.1) {
14231446
return Err(TskitError::RangeError(
@@ -1426,13 +1449,13 @@ impl TableCollection {
14261449
}
14271450
keep_sites
14281451
.iter_mut()
1429-
.zip(self.sites_iter())
1452+
.zip(tables.sites_iter())
14301453
.for_each(|(k, site_row)| {
14311454
*k = *k || ((site_row.position >= s) && (site_row.position < e));
14321455
});
14331456

14341457
// use stream_iter and while-let pattern for easier ? operator within a loop
1435-
let mut edge_iter = self
1458+
let mut edge_iter = tables
14361459
.edges()
14371460
.lending_iter()
14381461
.filter(|edge_row| !((edge_row.right <= s) || (edge_row.left >= e)));
@@ -1452,7 +1475,7 @@ impl TableCollection {
14521475
)?;
14531476
}
14541477

1455-
let mut migration_iter = self
1478+
let mut migration_iter = tables
14561479
.migrations()
14571480
.lending_iter()
14581481
.filter(|mrow| !!((mrow.right <= s) || (mrow.left >= e)));
@@ -1471,7 +1494,7 @@ impl TableCollection {
14711494
}
14721495

14731496
let mut running_site_id = 0;
1474-
let mut site_iter = self.sites().lending_iter();
1497+
let mut site_iter = tables.sites().lending_iter();
14751498
while let Some(site_row) = site_iter.next() {
14761499
let old_id = site_row.id.to_usize().unwrap();
14771500
if keep_sites[old_id] {
@@ -1486,19 +1509,22 @@ impl TableCollection {
14861509
}
14871510

14881511
// build mutation_map
1489-
let mutation_map = {
1490-
let mut v = Vec::with_capacity(keep_sites.len());
1512+
let mutation_map: Vec<_> = {
14911513
let mut n = 0;
1492-
self.mutations().site_slice().iter().for_each(|site| {
1493-
if keep_sites[site.as_usize()] {
1494-
n += 1
1495-
};
1496-
v.push(n - 1);
1497-
});
1498-
v
1514+
tables
1515+
.mutations()
1516+
.site_slice()
1517+
.iter()
1518+
.map(|site| {
1519+
if keep_sites[site.as_usize()] {
1520+
n += 1
1521+
};
1522+
n - 1
1523+
})
1524+
.collect()
14991525
};
15001526

1501-
let mut mutations_iter = self.mutations().lending_iter();
1527+
let mut mutations_iter = tables.mutations().lending_iter();
15021528
while let Some(mutation_row) = mutations_iter.next() {
15031529
let old_id = mutation_row.site.to_usize().unwrap();
15041530
if keep_sites[old_id] {
@@ -1528,20 +1554,25 @@ impl TableCollection {
15281554
let new_sites = SiteTable::new_from_table(new_sites.as_mut())?;
15291555

15301556
// replace old tables with new tables
1531-
self.set_edges(&new_edges).map(|_| ())?;
1532-
self.set_migrations(&new_migrations).map(|_| ())?;
1533-
self.set_mutations(&new_mutations).map(|_| ())?;
1534-
self.set_sites(&new_sites)?;
1557+
tables.set_edges(&new_edges).map(|_| ())?;
1558+
tables.set_migrations(&new_migrations).map(|_| ())?;
1559+
tables.set_mutations(&new_mutations).map(|_| ())?;
1560+
tables.set_sites(&new_sites)?;
15351561

15361562
// sort tables
1537-
self.full_sort(TableSortOptions::default())?;
1563+
tables.full_sort(TableSortOptions::default())?;
15381564

15391565
// simplify tables
15401566
if simplify {
1541-
let samples = self.samples_as_vector();
1542-
self.simplify(samples.as_slice(), SimplificationOptions::default(), false)?;
1567+
let samples = tables.samples_as_vector();
1568+
tables.simplify(samples.as_slice(), SimplificationOptions::default(), false)?;
15431569
}
15441570

1545-
Ok(())
1571+
// return None when edge table is empty
1572+
if tables.edges().num_rows() == 0 {
1573+
Ok(None)
1574+
} else {
1575+
Ok(Some(tables))
1576+
}
15461577
}
15471578
}

0 commit comments

Comments
 (0)