From fef9c65a3a5e5bdfe8475f73787db4ab42678a3a Mon Sep 17 00:00:00 2001 From: "Kevin R. Thornton" Date: Wed, 3 Jul 2024 11:02:33 -0700 Subject: [PATCH] refactor: separate keep_intervals from simplification --- src/table_collection.rs | 23 ++++++++++------------- src/test_fixtures.rs | 33 +++++++++++---------------------- src/trees/treeseq.rs | 31 ++++++++++++++++--------------- 3 files changed, 37 insertions(+), 50 deletions(-) diff --git a/src/table_collection.rs b/src/table_collection.rs index 621e9d6a..a8d1718d 100644 --- a/src/table_collection.rs +++ b/src/table_collection.rs @@ -1379,16 +1379,19 @@ impl TableCollection { /// Truncate the [TableCollection] to specified genome intervals. /// - /// # Return + /// # Return value /// - `Ok(None)`: when truncation leads to empty edge table. /// - `Ok(Some(TableCollection))`: when trunction is successfully performed - /// and results in non-empty edge table. + /// and results in non-empty edge table. The table collection is sorted. /// - `Error(TskitError)`: Any errors from the C API propagate. An /// [TskitError::RangeError] will occur when `intervals` are not - /// sorted. Note that as `tskit` currently does not support `simplify` - /// on [TableCollection] with a non-empty migration table, calling - /// `keep_intervals` on those [TableCollection] with `simplify` set to - /// `true` will return an error. + /// sorted. + /// + /// # Notes + /// + /// - There is no option to simplify the output value. + /// Do so manually if desired. + /// Encapsulate the procedure if necessary. /// /// # Example /// ```rust @@ -1414,7 +1417,7 @@ impl TableCollection { /// # tables.build_index().unwrap(); /// # /// let intervals = [(0.0, 10.0), (90.0, 100.0)].into_iter(); - /// tables.keep_intervals(intervals, true).unwrap().unwrap(); + /// tables.keep_intervals(intervals).unwrap().unwrap(); /// ``` /// /// Note that no new provenance will be appended. @@ -1564,12 +1567,6 @@ impl TableCollection { // sort tables tables.full_sort(TableSortOptions::default())?; - // simplify tables - if simplify { - let samples = tables.samples_as_vector(); - tables.simplify(samples.as_slice(), SimplificationOptions::default(), false)?; - } - // return None when edge table is empty if tables.edges().num_rows() == 0 { Ok(None) diff --git a/src/test_fixtures.rs b/src/test_fixtures.rs index 0cbce82c..11bbba50 100644 --- a/src/test_fixtures.rs +++ b/src/test_fixtures.rs @@ -519,7 +519,7 @@ mod keep_intervals { for intervals in intervals_lst { let add_migration_table = false; let trees = generate_simple_treesequence(add_migration_table); - let res = trees.keep_intervals(intervals.into_iter(), true); + let res = trees.keep_intervals(intervals.into_iter()); assert!(res.is_err()); } } @@ -529,27 +529,13 @@ mod keep_intervals { let intervals = [(10.0, 20.0)]; let add_migration_table = true; - let to_simplify = true; let trees = generate_simple_treesequence(add_migration_table); - let res = trees.keep_intervals(intervals.iter().copied(), to_simplify); - assert!(res.is_err()); - - let add_migration_table = true; - let to_simply = false; - let trees = generate_simple_treesequence(add_migration_table); - let res = trees.keep_intervals(intervals.iter().copied(), to_simply); - assert!(res.is_ok()); - - let add_migration_table = false; - let to_simply = true; - let trees = generate_simple_treesequence(add_migration_table); - let res = trees.keep_intervals(intervals.iter().copied(), to_simply); + let res = trees.keep_intervals(intervals.iter().copied()); assert!(res.is_ok()); let add_migration_table = false; - let to_simply = false; let trees = generate_simple_treesequence(add_migration_table); - let res = trees.keep_intervals(intervals.iter().copied(), to_simply); + let res = trees.keep_intervals(intervals.iter().copied()); assert!(res.is_ok()); } @@ -573,20 +559,23 @@ mod keep_intervals { .unwrap(); if exepected.edges().num_rows() > 0 { - let truncated = full_trees - .keep_intervals(intervals.iter().copied(), true) + let mut truncated = full_trees + .keep_intervals(intervals.iter().copied()) .expect("error") .expect("empty table"); + let samples = truncated.samples_as_vector(); + assert!(truncated.edges().num_rows() > 0); + truncated + .simplify(&samples, crate::SimplificationOptions::default(), false) + .expect("error simplifying"); // dump tables for comparision - let truncated = truncated.dump_tables().unwrap(); let expected = exepected.dump_tables().unwrap(); - let res = truncated.equals(&expected, TableEqualityOptions::all()); assert!(res); } else { let trucated = full_trees - .keep_intervals(intervals.iter().copied(), true) + .keep_intervals(intervals.iter().copied()) .unwrap(); assert!(trucated.is_none()); } diff --git a/src/trees/treeseq.rs b/src/trees/treeseq.rs index 83ac1732..84e155fb 100644 --- a/src/trees/treeseq.rs +++ b/src/trees/treeseq.rs @@ -346,15 +346,19 @@ impl TreeSequence { /// Truncate the [TreeSequence] to specified genome intervals. /// + /// # Return value /// - `Ok(None)`: when truncation leads to empty edge table. /// - `Ok(Some(TableCollection))`: when trunction is successfully performed - /// and results in non-empty edge table. + /// and results in non-empty edge table. The tables are sorted. /// - `Error(TskitError)`: Any errors from the C API propagate. An /// [TskitError::RangeError] will occur when `intervals` are not - /// sorted. Note that as `tskit` currently does not support `simplify` - /// on [TreeSequence] with a non-empty migration table, calling - /// `keep_intervals` on those [TreeSequence] with `simplify` set to `true` - /// will return an error. + /// sorted. + /// + /// # Notes + /// + /// - There is no option to simplify the output value. + /// Do so manually if desired. + /// Encapsulate the procedure if necessary. /// /// # Example /// ```rust @@ -382,25 +386,22 @@ impl TreeSequence { /// # let trees = TreeSequence::new(tables, TreeSequenceFlags::default()).unwrap(); /// # /// let intervals = [(0.0, 10.0), (90.0, 100.0)].into_iter(); - /// trees.keep_intervals(intervals, true).unwrap().unwrap(); + /// let mut tables = trees.keep_intervals(intervals).unwrap().unwrap(); + /// // Conversion back to tree sequence requires the usual steps + /// tables.simplify(&tables.samples_as_vector(), tskit::SimplificationOptions::default(), false).unwrap(); + /// tables.build_index().unwrap(); + /// let trees = tables.tree_sequence(tskit::TreeSequenceFlags::default()).unwrap(); /// ``` /// /// Note that no new provenance will be appended. pub fn keep_intervals

( self, intervals: impl Iterator, - ) -> Result, TskitError> + ) -> Result, TskitError> where P: Into, { - let tables = self.dump_tables()?; - match tables.keep_intervals(intervals) { - Ok(Some(tables)) => { - Self::new(tables, TreeSequenceFlags::default().build_indexes()).map(Some) - } - Ok(None) => Ok(None), - Err(e) => Err(e), - } + self.dump_tables()?.keep_intervals(intervals) } #[cfg(feature = "provenance")]