Skip to content

Commit

Permalink
Add edge index to Phylo Factor objective function
Browse files Browse the repository at this point in the history
  • Loading branch information
lczech committed Feb 20, 2024
1 parent 6fe7d73 commit 0340470
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 15 deletions.
13 changes: 7 additions & 6 deletions lib/genesis/tree/mass_tree/phylo_factor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,9 +181,10 @@ std::unordered_set<size_t> phylo_factor_subtree_indices(
}

PhyloFactor phylo_factor_find_best_edge(
size_t iteration,
BalanceData const& data,
std::unordered_set<size_t> const& candidate_edges,
std::function<double( std::vector<double> const& balances, size_t edge_index )> objective
PhyloFactorObjectiveFunction objective
) {
assert( ! data.tree.empty() );

Expand Down Expand Up @@ -242,7 +243,7 @@ PhyloFactor phylo_factor_find_best_edge(
auto const balances = mass_balance( data, s_indices, p_indices );

// Calculate the objective function, and store it in the result.
auto const ov = objective( balances, ce_idx );
auto const ov = objective( iteration, ce_idx, balances );
result.all_objective_values[ ce_idx ] = ov;
if( ! std::isfinite( ov )) {
continue;
Expand Down Expand Up @@ -272,7 +273,7 @@ std::vector<PhyloFactor> phylogenetic_factorization(
) {
return phylogenetic_factorization(
data,
[&objective]( std::vector<double> const& balances, size_t ){
[&objective]( size_t, size_t, std::vector<double> const& balances ){
return objective( balances );
},
max_iterations,
Expand All @@ -282,7 +283,7 @@ std::vector<PhyloFactor> phylogenetic_factorization(

std::vector<PhyloFactor> phylogenetic_factorization(
BalanceData const& data,
std::function<double( std::vector<double> const& balances, size_t edge_index )> objective,
PhyloFactorObjectiveFunction objective,
size_t max_iterations,
std::function<void( size_t iteration, size_t max_iterations )> log_progress
) {
Expand Down Expand Up @@ -312,11 +313,11 @@ std::vector<PhyloFactor> phylogenetic_factorization(

// Log the progress, if needed.
if( log_progress ) {
log_progress( it + 1, max_iterations );
log_progress( it, max_iterations );
}

// Find and store the next (greedy) phylo factor.
result.push_back( phylo_factor_find_best_edge( data, candidate_edges, objective ));
result.push_back( phylo_factor_find_best_edge( it, data, candidate_edges, objective ));

// Remove its edge from the candiate list.
assert( candidate_edges.count( result.back().edge_index ) > 0 );
Expand Down
26 changes: 17 additions & 9 deletions lib/genesis/tree/mass_tree/phylo_factor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ using MassTree = Tree;
// Phylogenetic Factorization
// =================================================================================================

/**
* @brief Function type used as the objective function in phylogenetic_factorization().
*/
using PhyloFactorObjectiveFunction = std::function<
double( size_t iteration, size_t edge_index, std::vector<double> const& balances )
>;

/**
* @brief A single phylogenetic factor.
*
Expand Down Expand Up @@ -118,9 +125,10 @@ std::unordered_set<size_t> phylo_factor_subtree_indices(
* to find the one that maximizes the objective function.
*/
PhyloFactor phylo_factor_find_best_edge(
size_t iteration,
BalanceData const& data,
std::unordered_set<size_t> const& candidate_edges,
std::function<double( std::vector<double> const& balances, size_t edge_index )> objective
PhyloFactorObjectiveFunction objective
);

/**
Expand All @@ -140,10 +148,10 @@ PhyloFactor phylo_factor_find_best_edge(
* reasonable value here.
*
* Lastly, a functional for logging the progress can be set, which needs to take the current and
* the maximal iteration counter (1-based) and can produce some logging for this:
* the maximal iteration counter (0-based) and can produce some logging for this:
*
* []( size_t iteration, size_t max_iterations ){
* LOG_DBG1 << "iteration " << iteration << " of " << max_iterations;
* LOG_DBG1 << "iteration " << (iteration + 1) << " of " << max_iterations;
* }
*
* More details on the method can be found in
Expand All @@ -166,17 +174,17 @@ std::vector<PhyloFactor> phylogenetic_factorization(
/**
* @brief Calculate the Phylogenetic Factorization (PhyloFactor) of a set of MassTree%s.
*
* This overload also provides the edge index of the current edge when computing the @p objective
* function. This is useful when some aspect of the computation in the objective function needs
* to be stored for later: In that case, the objective function can be provided as a lambda that
* uses the additional @p edge_index to store the results of some objective computation.
* The intended use case is for instance the computation of
* This overload also provides the iteration and edge index of the current edge when computing the
* @p objective function. This is useful when some aspect of the computation in the objective
* function needs to be stored for later: In that case, the objective function can be provided
* as a lambda that uses the additional @p edge_index to store the results of some objective
* computation. The intended use case is for instance the computation of
* @link genesis::utils::glm_fit() glm_fit()@endlink in the objective function, and subsequent
* storage of the model output, such has deviance or beta estimates, for later inspection.
*/
std::vector<PhyloFactor> phylogenetic_factorization(
BalanceData const& data,
std::function<double( std::vector<double> const& balances, size_t edge_index )> objective,
PhyloFactorObjectiveFunction objective,
size_t max_iterations = 0,
std::function<void( size_t iteration, size_t max_iterations )> log_progress = {}
);
Expand Down

0 comments on commit 0340470

Please sign in to comment.