Skip to content

Commit

Permalink
refactor: separate keep_intervals from simplify (#647)
Browse files Browse the repository at this point in the history
  • Loading branch information
molpopgen committed Jul 4, 2024
1 parent cb37378 commit 90603cf
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 52 deletions.
24 changes: 10 additions & 14 deletions src/table_collection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1379,16 +1379,19 @@ impl TableCollection {

/// Truncate the [TableCollection] to specified genome intervals.
///
/// # Return
/// # Return value
/// - `Ok(None)`: when truncation leads to empty edge table.
/// - `Ok(Some(TableCollection))`: when trunction is successfully performed
/// and results in non-empty edge table.
/// and results in non-empty edge table. The table collection is sorted.
/// - `Error(TskitError)`: Any errors from the C API propagate. An
/// [TskitError::RangeError] will occur when `intervals` are not
/// sorted. Note that as `tskit` currently does not support `simplify`
/// on [TableCollection] with a non-empty migration table, calling
/// `keep_intervals` on those [TableCollection] with `simplify` set to
/// `true` will return an error.
/// sorted.
///
/// # Notes
///
/// - There is no option to simplify the output value.
/// Do so manually if desired.
/// Encapsulate the procedure if necessary.
///
/// # Example
/// ```rust
Expand All @@ -1414,14 +1417,13 @@ impl TableCollection {
/// # tables.build_index().unwrap();
/// #
/// let intervals = [(0.0, 10.0), (90.0, 100.0)].into_iter();
/// tables.keep_intervals(intervals, true).unwrap().unwrap();
/// tables.keep_intervals(intervals).unwrap().unwrap();
/// ```
///
/// Note that no new provenance will be appended.
pub fn keep_intervals<P>(
self,
intervals: impl Iterator<Item = (P, P)>,
simplify: bool,
) -> Result<Option<Self>, TskitError>
where
P: Into<Position>,
Expand Down Expand Up @@ -1565,12 +1567,6 @@ impl TableCollection {
// sort tables
tables.full_sort(TableSortOptions::default())?;

// simplify tables
if simplify {
let samples = tables.samples_as_vector();
tables.simplify(samples.as_slice(), SimplificationOptions::default(), false)?;
}

// return None when edge table is empty
if tables.edges().num_rows() == 0 {
Ok(None)
Expand Down
33 changes: 11 additions & 22 deletions src/test_fixtures.rs
Original file line number Diff line number Diff line change
Expand Up @@ -519,7 +519,7 @@ mod keep_intervals {
for intervals in intervals_lst {
let add_migration_table = false;
let trees = generate_simple_treesequence(add_migration_table);
let res = trees.keep_intervals(intervals.into_iter(), true);
let res = trees.keep_intervals(intervals.into_iter());
assert!(res.is_err());
}
}
Expand All @@ -529,27 +529,13 @@ mod keep_intervals {
let intervals = [(10.0, 20.0)];

let add_migration_table = true;
let to_simplify = true;
let trees = generate_simple_treesequence(add_migration_table);
let res = trees.keep_intervals(intervals.iter().copied(), to_simplify);
assert!(res.is_err());

let add_migration_table = true;
let to_simply = false;
let trees = generate_simple_treesequence(add_migration_table);
let res = trees.keep_intervals(intervals.iter().copied(), to_simply);
assert!(res.is_ok());

let add_migration_table = false;
let to_simply = true;
let trees = generate_simple_treesequence(add_migration_table);
let res = trees.keep_intervals(intervals.iter().copied(), to_simply);
let res = trees.keep_intervals(intervals.iter().copied());
assert!(res.is_ok());

let add_migration_table = false;
let to_simply = false;
let trees = generate_simple_treesequence(add_migration_table);
let res = trees.keep_intervals(intervals.iter().copied(), to_simply);
let res = trees.keep_intervals(intervals.iter().copied());
assert!(res.is_ok());
}

Expand All @@ -573,20 +559,23 @@ mod keep_intervals {
.unwrap();

if exepected.edges().num_rows() > 0 {
let truncated = full_trees
.keep_intervals(intervals.iter().copied(), true)
let mut truncated = full_trees
.keep_intervals(intervals.iter().copied())
.expect("error")
.expect("empty table");
let samples = truncated.samples_as_vector();
assert!(truncated.edges().num_rows() > 0);
truncated
.simplify(&samples, crate::SimplificationOptions::default(), false)
.expect("error simplifying");

// dump tables for comparision
let truncated = truncated.dump_tables().unwrap();
let expected = exepected.dump_tables().unwrap();

let res = truncated.equals(&expected, TableEqualityOptions::all());
assert!(res);
} else {
let trucated = full_trees
.keep_intervals(intervals.iter().copied(), true)
.keep_intervals(intervals.iter().copied())
.unwrap();
assert!(trucated.is_none());
}
Expand Down
32 changes: 16 additions & 16 deletions src/trees/treeseq.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,15 +346,19 @@ impl TreeSequence {

/// Truncate the [TreeSequence] to specified genome intervals.
///
/// # Return value
/// - `Ok(None)`: when truncation leads to empty edge table.
/// - `Ok(Some(TableCollection))`: when trunction is successfully performed
/// and results in non-empty edge table.
/// and results in non-empty edge table. The tables are sorted.
/// - `Error(TskitError)`: Any errors from the C API propagate. An
/// [TskitError::RangeError] will occur when `intervals` are not
/// sorted. Note that as `tskit` currently does not support `simplify`
/// on [TreeSequence] with a non-empty migration table, calling
/// `keep_intervals` on those [TreeSequence] with `simplify` set to `true`
/// will return an error.
/// sorted.
///
/// # Notes
///
/// - There is no option to simplify the output value.
/// Do so manually if desired.
/// Encapsulate the procedure if necessary.
///
/// # Example
/// ```rust
Expand Down Expand Up @@ -382,26 +386,22 @@ impl TreeSequence {
/// # let trees = TreeSequence::new(tables, TreeSequenceFlags::default()).unwrap();
/// #
/// let intervals = [(0.0, 10.0), (90.0, 100.0)].into_iter();
/// trees.keep_intervals(intervals, true).unwrap().unwrap();
/// let mut tables = trees.keep_intervals(intervals).unwrap().unwrap();
/// // Conversion back to tree sequence requires the usual steps
/// tables.simplify(&tables.samples_as_vector(), tskit::SimplificationOptions::default(), false).unwrap();
/// tables.build_index().unwrap();
/// let trees = tables.tree_sequence(tskit::TreeSequenceFlags::default()).unwrap();
/// ```
///
/// Note that no new provenance will be appended.
pub fn keep_intervals<P>(
self,
intervals: impl Iterator<Item = (P, P)>,
simplify: bool,
) -> Result<Option<Self>, TskitError>
) -> Result<Option<TableCollection>, TskitError>
where
P: Into<Position>,
{
let tables = self.dump_tables()?;
match tables.keep_intervals(intervals, simplify) {
Ok(Some(tables)) => {
Self::new(tables, TreeSequenceFlags::default().build_indexes()).map(Some)
}
Ok(None) => Ok(None),
Err(e) => Err(e),
}
self.dump_tables()?.keep_intervals(intervals)
}

#[cfg(feature = "provenance")]
Expand Down

0 comments on commit 90603cf

Please sign in to comment.