diff --git a/.cz.toml b/.cz.toml index ee9e1967..45c84afd 100644 --- a/.cz.toml +++ b/.cz.toml @@ -1,6 +1,6 @@ [tool.commitizen] name = "cz_conventional_commits" -version = "2.3.7" +version = "2.3.8" version_provider = "poetry" version_files = [ "pyproject.toml", diff --git a/docs/source/algorithms/matching_algorithms.rst b/docs/source/algorithms/matching_algorithms.rst new file mode 100644 index 00000000..1a85b58f --- /dev/null +++ b/docs/source/algorithms/matching_algorithms.rst @@ -0,0 +1,61 @@ +Matching Algorithms for Hypergraphs +=================================== + +Introduction +------------ +This module implements various algorithms for finding matchings in hypergraphs. These algorithms are based on the methods described in the paper: + +*Distributed Algorithms for Matching in Hypergraphs* by Oussama Hanguir and Clifford Stein. + +The paper addresses the problem of finding matchings in d-uniform hypergraphs, where each hyperedge contains exactly d vertices. The matching problem is NP-complete for d ≥ 3, making it one of the classic challenges in computational theory. The algorithms described here are designed for the Massively Parallel Computation (MPC) model, which is suitable for processing large-scale hypergraphs. + +Mathematical Foundation +------------------------ +The algorithms in this module provide different trade-offs between approximation ratios, memory usage, and computation rounds: + +1. **O(d²)-approximation algorithm**: + - This algorithm partitions the hypergraph into random subgraphs and computes a matching for each subgraph. The results are combined to obtain a matching for the original hypergraph. + - Approximation ratio: O(d²) + - Rounds: 3 + - Memory: O(√nm) + +2. **d-approximation algorithm**: + - Uses sampling and post-processing to iteratively build a maximal matching. + - Approximation ratio: d + - Rounds: O(log n) + - Memory: O(dn) + +3. **d(d−1 + 1/d)²-approximation algorithm**: + - Utilizes the concept of HyperEdge Degree Constrained Subgraphs (HEDCS) to find an approximate matching. + - Approximation ratio: d(d−1 + 1/d)² + - Rounds: 3 + - Memory: O(√nm) for linear hypergraphs, O(n√nm) for general cases. + +These algorithms are crucial for applications that require scalable parallel processing, such as combinatorial auctions, scheduling, and multi-agent systems. + +Usage Example +------------- +Below is an example of how to use the matching algorithms module. + +```python +from hypernetx.algorithms import matching_algorithms as ma + +# Example hypergraph data +hypergraph = ... # Assume this is a d-uniform hypergraph + +# Compute a matching using the O(d²)-approximation algorithm +matching = ma.matching_approximation_d_squared(hypergraph) + +# Compute a matching using the d-approximation algorithm +matching_d = ma.matching_approximation_d(hypergraph) + +# Compute a matching using the d(d−1 + 1/d)²-approximation algorithm +matching_d_squared = ma.matching_approximation_dd(hypergraph) + +print(matching, matching_d, matching_d_squared) + + +References +------------- + +- Oussama Hanguir, Clifford Stein, Distributed Algorithms for Matching in Hypergraphs, https://arxiv.org/pdf/2009.09605 diff --git a/docs/source/conf.py b/docs/source/conf.py index fa5f7849..52050268 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -19,7 +19,7 @@ import os -__version__ = "2.3.7" +__version__ = "2.3.8" # If extensions (or modules to document with autodoc) are in another directory, diff --git a/docs/source/contributions.rst b/docs/source/contributions.rst new file mode 100644 index 00000000..8a50041b --- /dev/null +++ b/docs/source/contributions.rst @@ -0,0 +1,56 @@ +HyperNetX Contributor Guidelines +**************************************************** + +We welcome contributions to HyperNetX! +This document outlines the process for contributing to various aspects of the codebase. +We currently only provide guidelines for new modules, but we are not opposed to other forms of contribution to the HyperNetX library. + +Contributing New Modules +============================ + +We happily accept the contribution of new modules or methods to the HyperNetX library. We will do our best to keep modules functioning with new release but may ask contributors to update code when possible. +Contributions can be slow, but the closer the pull request is to our guidelines the faster this process will be. +The required new files for any new module are listed below and an example file structure of the additional files is shown in the figure below. + +.. image:: ./images/module_addition_file_structure.png + :width: 330px + :align: right + +* Python file: Create a new Python file named **.py** under the folder **hypernetx/hypernetx/algorithms/.**. This file will contain the core functionalities of your module. All methods need to have docstrings in the new module. + +* Jupyter Notebook: Create a Jupyter notebook under in the folder **hypernetx/tutorials/advanced/.** that demonstrates usage examples of your module. This notebook should be named **Advanced - .ipynb**. Please look at the current advanced module number and choose an appropriate number. + +* Test file: Write unit tests for your module in a file named **test_.py** under the tests folder located at **hypernetx/tests/algorithms/.**. These tests should ensure the correctness and functionality of your code. + +* Documentation: Write an rst file named **.rst** under the algorithms documentation folder located at **hypernetx/docs/source/algorithms/.**. This documentations should focus on the mathematics or theory behind your module with citations to relevant papers and documents. Additionally it should provide code snippets demonstrating usage that parallel the tutorial. + +Step-by-Step Process +~~~~~~~~~~~~~~~~~~~~ + +#. Branch Creation: Create a new branch from the main development branch for your contribution. This allows you to isolate your changes and work independently. Use a descriptive branch name that reflects the module you're adding (e.g., add_). + +#. Code Implementation: Implement the functionalities of your module in a new **.py** file located in **hypernetx/hypernetx/algorithms/.**. Please validate that your code dependencies are not in conflict with the core HNX dependencies. Any additional dependencies should be documented thoroughly including in the notebook creation step. + +#. Documentation: Write docstrings for your code to explain the purpose and usage of functions and classes. Additionally provide an overview description of the module in the python file. For an example of the correct docstring format please see the module **hypernetx/hypernetx/algorithms/s_centrality_measures.py**. + +#. Jupyter Notebook Creation: Create a Jupyter notebook named **Advanced - .ipynb** under advanced tutorials folder **hypernetx/tutorials/advanced/.**. This notebook should showcase how to use your module and demonstrate its capabilities with thorough documentation. Additionally, in the notebook and any other documentation please provide clear documentation on any new dependencies outside of core HNX that are required. + +#. Testing: Write unit tests in the test_.py file to ensure your module functions as expected. This should be located in the algorithm tests folder. In the top hypernetx directory you can use the makefile and the command ``make test`` to validate everything is passing. Please see other tests and follow a similar format. + +# Read the Docs: Include your rst file in the algorithms folder of the source docs overviewing the theory/mathematics of the new module with example code. See other rst files as examples of formatting. + +#. __init__.py Update: Update the __init__.py file in the **hypernetx/hypernetx/algorithms/** folder to import your new module. Please follow the style of importing used by the other modules. + +#. Commit and Push: Commit your changes with clear and concise commit messages describing each modification. Push your commits to your branch on the remote repository. + +#. Pull Request: Create a pull request from your branch to the main development branch. This will initiate a code review process. + +Additional Notes +~~~~~~~~~~~~~~~~~~~~ + +* Make sure your code adheres to PEP 8 style guidelines for Python code. +* Please add comments to your code to explain complex logic or non-obvious functionalities. +* During the review process, address any feedback or suggestions from reviewers promptly. +* Any dependencies in core HNX should not be changed for new modules. Also please list any additionally dependencies thoroughly. + +By following these guidelines, you can ensure a smooth and efficient contribution process for adding new modules to HyperNetX. We appreciate your contributions to the project! \ No newline at end of file diff --git a/docs/source/images/module_addition_file_structure.png b/docs/source/images/module_addition_file_structure.png new file mode 100644 index 00000000..6308decb Binary files /dev/null and b/docs/source/images/module_addition_file_structure.png differ diff --git a/docs/source/index.rst b/docs/source/index.rst index df2b8110..b0010eb8 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -61,6 +61,7 @@ Contents Visualization Widget Algorithms: Modularity and Clustering Publications + Contributors Guide license diff --git a/hypernetx/__init__.py b/hypernetx/__init__.py index 3acc7796..d198b9f7 100644 --- a/hypernetx/__init__.py +++ b/hypernetx/__init__.py @@ -11,4 +11,4 @@ from hypernetx.utils import * from hypernetx.utils.toys import * -__version__ = "2.3.7" +__version__ = "2.3.8" diff --git a/hypernetx/algorithms/__init__.py b/hypernetx/algorithms/__init__.py index 78b30c49..3dd7129d 100644 --- a/hypernetx/algorithms/__init__.py +++ b/hypernetx/algorithms/__init__.py @@ -19,6 +19,13 @@ hypergraph_homology_basis, interpret, ) +from hypernetx.algorithms.matching_algorithms import ( + greedy_matching, + maximal_matching, + iterated_sampling, + HEDCS_matching, + approximation_matching_checking, +) from hypernetx.algorithms.s_centrality_measures import ( s_betweenness_centrality, s_harmonic_closeness_centrality, @@ -116,4 +123,10 @@ "two_section", "kumar", "last_step", + # matching_algorithms API's + "greedy_matching", + "maximal_matching", + "iterated_sampling", + "HEDCS_matching", + "approximation_matching_checking", ] diff --git a/hypernetx/algorithms/matching_algorithms.py b/hypernetx/algorithms/matching_algorithms.py new file mode 100644 index 00000000..ef13e8df --- /dev/null +++ b/hypernetx/algorithms/matching_algorithms.py @@ -0,0 +1,597 @@ +""" +An implementation of the algorithms in: +"Distributed Algorithms for Matching in Hypergraphs", + by Oussama Hanguir and Clifford Stein (2020), https://arxiv.org/abs/2009.09605v1 +Programmer: Shira Rot, Niv +Date: 22.5.2024 +""" + +from functools import lru_cache +import hypernetx as hnx +from hypernetx.classes.hypergraph import Hypergraph +import math +import random +from concurrent.futures import ThreadPoolExecutor + + +def approximation_matching_checking(optimal: list, approx: list) -> bool: + """ + Checks if the approximate list contains at least one element that is a subset of each element in the optimal list. + + Parameters + ---------- + optimal : list of lists + A list of lists representing the optimal solutions. + approx : list of lists + A list of lists representing the approximate solutions. + + Returns + ------- + bool + True if the approximate list contains at least one element that is a subset of each element in the optimal list, False otherwise. + """ + for e in optimal: + count = 0 + e_checks = set(e) + for e_m in approx: + e_m_checks = set(e_m) + common_elements = e_checks.intersection(e_m_checks) + checking = bool(common_elements) + if checking: + count += 1 + if count < 1: + return False + return True + + +def greedy_matching(hypergraph: Hypergraph, k: int) -> list: + """ + Greedy algorithm for hypergraph matching. + + This algorithm constructs a random k-partitioning of G and finds a maximal matching. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + A Hypergraph object. + k : int + The number of partitions. + + Returns + ------- + list + The edges of the graph for the greedy matching. + + Raises + ------ + NonUniformHypergraphError + If the hypergraph is not uniform (i.e., if the edges have different sizes). + + Examples + ------- + >>> import numpy as np + >>> np.random.seed(42) + >>> random.seed(42) + >>> edges = {'e1': [1, 2, 3], 'e2': [2, 3, 4], 'e3': [1, 4, 5]} + >>> hypergraph = Hypergraph(edges) + >>> k = 2 + >>> matching = greedy_matching(hypergraph, k) + >>> matching + [(2, 3, 4)] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> edges_large = {f'e{i}': list(range(i, i + 3)) for i in range(1, 50)} + >>> hypergraph_large = Hypergraph(edges_large) + >>> k = 5 + >>> matching_large = greedy_matching(hypergraph_large, k) + >>> len(matching_large) + 12 + + >>> edges_non_uniform = {'e1': [1, 2, 3], 'e2': [4, 5], 'e3': [6, 7, 8, 9]} + >>> hypergraph_non_uniform = Hypergraph(edges_non_uniform) + >>> try: + ... greedy_matching(hypergraph_non_uniform, k) + ... except NonUniformHypergraphError: + ... print("NonUniformHypergraphError raised") + NonUniformHypergraphError raised + """ + + # Check if the hypergraph is empty + if not hypergraph.incidence_dict: + return [] + + # Check if the hypergraph is d-uniform + edge_sizes = {len(edge) for edge in hypergraph.incidence_dict.values()} + if len(edge_sizes) > 1: + raise NonUniformHypergraphError("The hypergraph is not d-uniform.") + + # Partition the hypergraph into k subgraphs + partitions = partition_hypergraph(hypergraph, k) + + # Find maximum matching for each partition in parallel + with ThreadPoolExecutor() as executor: + MM_list = list(executor.map(maximal_matching, partitions)) + + # Initialize the matching set + M = set() + + # Process each partition's matching + for MM_Gi in MM_list: + # Add edges to M if they do not violate the matching property + for edge in MM_Gi: + if not any(set(edge) & set(matching_edge) for matching_edge in M): + M.add(tuple(edge)) + + return list(M) + + +class MemoryLimitExceededError(Exception): + """Custom exception to indicate memory limit exceeded during hypergraph matching.""" + + pass + + +class NonUniformHypergraphError(Exception): + """Custom exception to indicate non d-uniform hypergraph during matching.""" + + pass + + +# necessary because Python's lru_cache decorator +# requires hashable inputs to cache function results. +def edge_tuple(hypergraph): + """ + Converts hypergraph edges to a hashable tuple. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + A Hypergraph object. + + Returns + ------- + tuple + A tuple representing the hypergraph edges, where each element is a tuple containing the edge name and its sorted vertices. + """ + return tuple( + (edge, tuple(sorted(hypergraph.edges[edge]))) + for edge in sorted(hypergraph.edges) + ) + + +@lru_cache(maxsize=None) # to cache the results of this function +def cached_maximal_matching(edges): + """ + Cached version of maximal matching calculation. + + Parameters + ---------- + edges : tuple + A tuple representing the hypergraph edges, where each element is a tuple containing the edge name and its sorted vertices. + + Returns + ------- + list + A list of matching edges. + """ + hypergraph = hnx.Hypergraph( + dict(edges) + ) # Converts the tuple of edges back into a hypergraph. + matching = [] + matched_vertices = set() # vertices that have already been matched. + + for edge in hypergraph.incidence_dict.values(): + if not any( + vertex in matched_vertices for vertex in edge + ): # Checks if current edge is already matched. + matching.append(sorted(edge)) # Adds the current edge to the matching. + matched_vertices.update(edge) + return matching # Returns the list of matching edges. + + +def maximal_matching(hypergraph: Hypergraph) -> list: + """ + Finds a maximal matching in the hypergraph. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + A Hypergraph object. + + Returns + ------- + list + A list of matching edges. + """ + edges = edge_tuple(hypergraph) + return cached_maximal_matching(edges) + + +def sample_edges(hypergraph: Hypergraph, p: float) -> Hypergraph: + """ + Samples edges from the hypergraph with probability p. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + The input hypergraph. + p : float + The probability of sampling each edge. + + Returns + ------- + hnx.Hypergraph + A new hypergraph containing the sampled edges. + """ + sampled_edges = [ + edge for edge in hypergraph.incidence_dict.values() if random.random() < p + ] + return hnx.Hypergraph( + {f"e{i}": tuple(edge) for i, edge in enumerate(sampled_edges)} + ) + + +def sampling_round(S: Hypergraph, p: float, s: int) -> tuple: + """ + Performs a single sampling round on the hypergraph. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + The input hypergraph. + p : float + The probability of sampling each edge. + s : int + The maximum number of edges to include in the matching. + + Returns + ------- + tuple + A tuple containing the maximal matching and the sampled hypergraph. If the sampled hypergraph has more than s edges, None and the sampled hypergraph are returned. + """ + E_prime = sample_edges(S, p) + if len(E_prime.incidence_dict.values()) > s: + return None, E_prime + matching = maximal_matching(E_prime) + return matching, E_prime + + +def iterated_sampling( + hypergraph: Hypergraph, s: int, max_iterations: int = 100 +) -> list: + """ + Iterated Sampling for Hypergraph Matching. + + Uses iterated sampling to find a maximal matching in a d-uniform hypergraph. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + A Hypergraph object. + s : int + The amount of memory available for the computer. + max_iterations : int, optional + The maximum number of iterations to perform. Defaults to 100. + + Returns + ------- + list + The edges of the graph for the approximate matching. + + Raises + ------ + MemoryLimitExceededError + If the memory limit is exceeded during the matching process. + + Examples + ------- + >>> import numpy as np + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3), 1: (2, 3, 4), 2: (3, 4, 5)}) + >>> result = iterated_sampling(hypergraph, 1) + >>> result + [[2, 3, 4]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3, 4), 1: (2, 3, 4, 5), 2: (3, 4, 5, 6)}) + >>> result = iterated_sampling(hypergraph, 2) + >>> result + [[2, 3, 4, 5]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3), 1: (4, 5, 6)}) + >>> result = None + >>> try: + ... result = iterated_sampling(hypergraph, 0) # Insufficient memory, expect failure + ... except MemoryLimitExceededError: + ... pass + >>> result is None + True + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3), 1: (4, 5, 6)}) + >>> result = iterated_sampling(hypergraph, 10) # Large enough memory, expect a result + >>> result + [[4, 5, 6], [1, 2, 3]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2, 3), 1: (2, 3, 4), 2: (3, 4, 5), 3: (5, 6, 7), 4: (6, 7, 8), 5: (7, 8, 9)}) + >>> result = iterated_sampling(hypergraph, 3) + >>> result + [[2, 3, 4], [5, 6, 7]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> s = 10 + >>> edges_d4 = {'e1': [1, 2, 3, 4], 'e2': [2, 3, 4, 5], 'e3': [3, 4, 5, 6], 'e4': [4, 5, 6, 7]} + >>> hypergraph_d4 = Hypergraph(edges_d4) + >>> approximate_matching_d4 = iterated_sampling(hypergraph_d4, s) + >>> approximate_matching_d4 + [[2, 3, 4, 5]] + + >>> edges_d5 = {'e1': [1, 2, 3, 4, 5], 'e2': [2, 3, 4, 5, 6], 'e3': [3, 4, 5, 6, 7]} + >>> hypergraph_d5 = Hypergraph(edges_d5) + >>> approximate_matching_d5 = iterated_sampling(hypergraph_d5, s) + >>> approximate_matching_d5 + [[1, 2, 3, 4, 5]] + + >>> edges_d6 = {'e1': [1, 2, 3, 4, 5, 6], 'e2': [2, 3, 4, 5, 6, 7], 'e3': [3, 4, 5, 6, 7, 8]} + >>> hypergraph_d6 = Hypergraph(edges_d6) + >>> approximate_matching_d6 = iterated_sampling(hypergraph_d6, s) + >>> approximate_matching_d6 + [[1, 2, 3, 4, 5, 6]] + + >>> edges_large = {f'e{i}': [i, i + 1, i + 2] for i in range(1, 101)} + >>> hypergraph_large = Hypergraph(edges_large) + >>> approximate_matching_large = iterated_sampling(hypergraph_large, s) + >>> len(approximate_matching_large) + 26 + """ + + d = max((len(edge) for edge in hypergraph.incidence_dict.values()), default=0) + M = [] + S = hypergraph + p = s / (5 * len(S.edges) * d) if len(S.edges) > 0 else 0 + iterations = 0 + + while iterations < max_iterations: + iterations += 1 + M_prime, E_prime = sampling_round(S, p, s) + if M_prime is None: + raise MemoryLimitExceededError( + "Memory limit exceeded during hypergraph matching" + ) + + M.extend(M_prime) + unmatched_vertices = set(S.nodes) - set(v for edge in M_prime for v in edge) + induced_edges = [ + edge + for edge in S.incidence_dict.values() + if all(v in unmatched_vertices for v in edge) + ] + if len(induced_edges) <= s: + M.extend( + maximal_matching( + hnx.Hypergraph( + {f"e{i}": tuple(edge) for i, edge in enumerate(induced_edges)} + ) + ) + ) + break + S = hnx.Hypergraph( + {f"e{i}": tuple(edge) for i, edge in enumerate(induced_edges)} + ) + p = s / (5 * len(S.edges) * d) if len(S.edges) > 0 else 0 + + if iterations >= max_iterations: + raise MemoryLimitExceededError( + "Max iterations reached without finding a solution" + ) + + return M + + +def check_beta_condition(beta, beta_minus, d): + """ + Checks if the beta condition is satisfied. + + Parameters + ---------- + beta : int + The current beta value. + beta_minus : int + The previous beta value. + d : int + The degree of the hypergraph. + + Returns + ------- + bool + True if the beta condition is satisfied, False otherwise. + """ + return (beta - beta_minus) >= (d - 1) + + +def build_HEDCS(hypergraph, beta, beta_minus): + """ + Constructs a Hyper-Edge Degree Constrained Subgraph (HEDCS) from the given hypergraph. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + The input hypergraph. + beta : int + Degree threshold for adding edges. + beta_minus : int + Complementary degree threshold for adding edges. + + Returns + ------- + hnx.Hypergraph + The constructed HEDCS. + """ + H = hnx.Hypergraph(hypergraph.incidence_dict) # Initialize H to be equal to G + degrees = {node: 0 for node in hypergraph.nodes} # Initialize vertex degrees + + for edge in H.edges: + for node in H.edges[edge]: + degrees[node] += 1 + + while True: + violating_edge = None + for edge in list(H.edges): + edge_degree_sum = sum(degrees[node] for node in H.edges[edge]) + if edge_degree_sum > beta: + violating_edge = edge + H.remove_edge(violating_edge) + for node in H.edges[violating_edge]: + degrees[node] -= 1 + break + + for edge in list(hypergraph.edges): + if edge not in H.edges: + edge_degree_sum = sum(degrees[node] for node in hypergraph.edges[edge]) + if edge_degree_sum < beta_minus: + violating_edge = edge + H.add_edge(violating_edge, hypergraph.edges[violating_edge]) + for node in H.edges[violating_edge]: + degrees[node] += 1 + break + + if violating_edge is None: + break + return H + + +def partition_hypergraph(hypergraph, k): + """ + Partitions a hypergraph into k approximately equal-sized subgraphs. + + Parameters + ---------- + hypergraph : hnx.Hypergraph + The input hypergraph. + k : int + The number of partitions. + + Returns + ------- + list[hnx.Hypergraph] + A list of k partitioned hypergraphs. + """ + edges = list(hypergraph.incidence_dict.items()) + random.shuffle(edges) + partitions = [edges[i::k] for i in range(k)] + return [hnx.Hypergraph(dict(part)) for part in partitions] + + +def HEDCS_matching(hypergraph: Hypergraph, s: int) -> list: + """ + HEDCS-Matching for Approximate Hypergraph Matching. + + This algorithm constructs Hyper-Edge Degree Constrained Subgraphs (HEDCS) + to find an approximate maximal matching in a d-uniform hypergraph. It leverages + parallelization to efficiently handle larger hypergraphs. + + Parameters + ---------- + hypergraph : Hypergraph + The input hypergraph. + s : int + The amount of memory available per machine. + + Returns + ------- + list + The edges of the graph for the approximate matching. + + Raises + ------- + NonUniformHypergraphError + If the hypergraph is not d-uniform (all edges don't have the same size). + ValueError + If the calculated beta and beta_minus values do not satisfy the beta condition. + + Examples + ------- + >>> import numpy as np + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2)}) + >>> result = HEDCS_matching(hypergraph, 10) + >>> result + [[1, 2]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> hypergraph = Hypergraph({0: (1, 2), 1: (3, 4)}) + >>> result = HEDCS_matching(hypergraph, 10) + >>> result + [[1, 2], [3, 4]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> edges = {'e1': [1, 2, 3], 'e2': [2, 3, 4], 'e3': [1, 4, 5]} + >>> hypergraph = Hypergraph(edges) + >>> s = 10 + >>> approximate_matching = HEDCS_matching(hypergraph, s) + >>> approximate_matching + [[1, 2, 3]] + + >>> np.random.seed(42) + >>> random.seed(42) + >>> edges_large = {f'e{i}': [i, i + 1, i + 2] for i in range(1, 101)} + >>> hypergraph_large = Hypergraph(edges_large) + >>> approximate_matching_large = HEDCS_matching(hypergraph_large, s) + >>> len(approximate_matching_large) + 34 + """ + + edge_sizes = {len(edge) for edge in hypergraph.incidence_dict.values()} + if len(edge_sizes) > 1: + raise NonUniformHypergraphError("The hypergraph is not d-uniform.") + + d = next(iter(edge_sizes)) + n = len(hypergraph.nodes) + m = len(hypergraph.edges) + + beta = 500 * d * 3 * n * 2 * (math.log(n) * 3) + gamma = 1 / (2 * n * math.log(n)) + k = math.ceil(m / (s * math.log(n))) + beta_minus = (1 - gamma) * beta + + if not check_beta_condition(beta, beta_minus, d): + raise ValueError(f"beta - beta_minus must be >= {d - 1}") + + # Partition the hypergraph + partitions = partition_hypergraph(hypergraph, k) + + # Build HEDCS for each partition in parallel + with ThreadPoolExecutor() as executor: + HEDCS_list = list( + executor.map(lambda part: build_HEDCS(part, beta, beta_minus), partitions) + ) + + # Combine all the edges from the HEDCS subgraphs + combined_edges = {} + for H in HEDCS_list: + combined_edges.update(H.incidence_dict) + + combined_hypergraph = hnx.Hypergraph(combined_edges) + + # Find the maximum matching in the combined hypergraph + max_matching = maximal_matching(combined_hypergraph) + + return max_matching + + +if __name__ == "__main__": + import doctest + + doctest.testmod() diff --git a/pyproject.toml b/pyproject.toml index e50760f4..38627341 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "hypernetx" -version = "2.3.7" +version = "2.3.8" description = "HyperNetX is a Python library for the creation and study of hypergraphs." authors = ["Brenda Praggastis ", "Dustin Arendt ", "Sinan Aksoy ", "Emilie Purvine ", diff --git a/tests/algorithms/test_matching.py b/tests/algorithms/test_matching.py new file mode 100644 index 00000000..4b38fc7f --- /dev/null +++ b/tests/algorithms/test_matching.py @@ -0,0 +1,180 @@ +""" +An implementation of the algorithms in: +"Distributed Algorithms for Matching in Hypergraphs", by Oussama Hanguir and Clifford Stein (2020), https://arxiv.org/abs/2009.09605v1 +Programmer: Shira Rot, Niv +Date: 22.5.2024 +""" + +import pytest +from hypernetx.classes.hypergraph import Hypergraph +from hypernetx.algorithms.matching_algorithms import ( + greedy_matching, + HEDCS_matching, + MemoryLimitExceededError, + approximation_matching_checking, +) +from hypernetx.algorithms.matching_algorithms import iterated_sampling + + +def test_greedy_d_approximation_empty_input(): + """ + Test for an empty input hypergraph. + """ + k = 2 + empty_hypergraph = Hypergraph({}) + assert greedy_matching(empty_hypergraph, k) == [] + + +def test_greedy_d_approximation_small_inputs(): + """ + Test for small input hypergraphs. + """ + k = 2 + hypergraph_1 = Hypergraph({"e1": {1, 2, 3}, "e2": {4, 5, 6}}) + assert greedy_matching(hypergraph_1, k) == [(1, 2, 3), (4, 5, 6)] + + hypergraph_2 = Hypergraph( + { + "e1": {1, 2, 3}, + "e2": {4, 5, 6}, + "e3": {7, 8, 9}, + "e4": {1, 4, 7}, + "e5": {2, 5, 8}, + "e6": {3, 6, 9}, + } + ) + result = greedy_matching(hypergraph_2, k) + assert len(result) == 3 + assert all(edge in [(1, 2, 3), (4, 5, 6), (7, 8, 9)] for edge in result) + + +def test_greedy_d_approximation_large_input(): + """ + Test for a large input hypergraph. + """ + k = 2 + large_hypergraph = Hypergraph( + {f"e{i}": {i, i + 1, i + 2} for i in range(1, 100, 3)} + ) + result = greedy_matching(large_hypergraph, k) + assert len(result) == len(large_hypergraph.edges) + assert all(edge in [(i, i + 1, i + 2) for i in range(1, 100, 3)] for edge in result) + + +def test_iterated_sampling_single_edge(): + """ + Test for a hypergraph with a single edge. + It checks if the result is not None and if all edges in the result have at least 2 vertices. + """ + hypergraph = Hypergraph({0: (1, 2, 3)}) + result = iterated_sampling(hypergraph, 10) + assert result is not None and all(len(edge) >= 2 for edge in result) + + +def test_iterated_sampling_two_disjoint_edges(): + """ + Test for a hypergraph with two disjoint edges. + It checks if the result is not None and if all edges in the result have at least 2 vertices. + """ + hypergraph = Hypergraph({0: (1, 2), 1: (3, 4)}) + result = iterated_sampling(hypergraph, 10) + assert result is not None and all(len(edge) >= 2 for edge in result) + + +def test_iterated_sampling_insufficient_memory(): + """ + Test for a hypergraph with insufficient memory. + It checks if the function raises a MemoryLimitExceededError when memory is set to 0. + """ + hypergraph = Hypergraph({0: (1, 2, 3)}) + with pytest.raises(MemoryLimitExceededError): + iterated_sampling(hypergraph, 0) + + +def test_iterated_sampling_large_memory(): + """ + Test for a hypergraph with sufficient memory. + It checks if the result is not None when memory is set to 10. + """ + hypergraph = Hypergraph({0: (1, 2, 3), 1: (4, 5, 6)}) + result = iterated_sampling(hypergraph, 10) + assert result is not None + + +def test_iterated_sampling_max_iterations(): + """ + Test for a hypergraph reaching maximum iterations. + """ + hypergraph = Hypergraph( + { + 0: (1, 2, 3), + 1: (2, 3, 4), + 2: (3, 4, 5), + 3: (5, 6, 7), + 4: (6, 7, 8), + 5: (7, 8, 9), + } + ) + result = iterated_sampling(hypergraph, 3) + assert result is None or all(len(edge) >= 2 for edge in result) + + +def test_iterated_sampling_large_hypergraph(): + """ + Test for a large hypergraph. + """ + edges_large = {f"e{i}": [i, i + 1, i + 2] for i in range(1, 101)} + hypergraph_large = Hypergraph(edges_large) + optimal_matching_large = [edges_large[f"e{i}"] for i in range(1, 101, 3)] + result = iterated_sampling(hypergraph_large, 10) + assert result is not None and approximation_matching_checking( + optimal_matching_large, result + ) + + +def test_HEDCS_matching_single_edge(): + """ + Test for a hypergraph with a single edge. + """ + hypergraph = Hypergraph({0: (1, 2)}) + result = HEDCS_matching(hypergraph, 10) + assert result is not None and all(len(edge) >= 2 for edge in result) + + +def test_HEDCS_matching_two_edges(): + """ + Test for a hypergraph with two disjoint edges. + """ + hypergraph = Hypergraph({0: (1, 2), 1: (3, 4)}) + result = HEDCS_matching(hypergraph, 10) + assert result is not None and all(len(edge) >= 2 for edge in result) + + +def test_HEDCS_matching_with_optimal_matching(): + """ + Test with a hypergraph where the optimal matching is known. + """ + edges = {"e1": [1, 2, 3], "e2": [2, 3, 4], "e3": [1, 4, 5]} + hypergraph = Hypergraph(edges) + s = 10 + optimal_matching = [[1, 2, 3]] # Assuming we know the optimal matching + approximate_matching = HEDCS_matching(hypergraph, s) + assert approximation_matching_checking(optimal_matching, approximate_matching) + + +def test_HEDCS_matching_large_hypergraph(): + """ + Test with a larger hypergraph. + """ + edges_large = {f"e{i}": [i, i + 1, i + 2] for i in range(1, 101)} + hypergraph_large = Hypergraph(edges_large) + s = 10 + optimal_matching_large = [edges_large[f"e{i}"] for i in range(1, 101, 3)] + approximate_matching_large = HEDCS_matching(hypergraph_large, s) + assert approximation_matching_checking( + optimal_matching_large, approximate_matching_large + ) + + +if __name__ == "__main__": + pytest.main() diff --git a/tutorials/advanced/Advanced 7 - Matching algorithms.ipynb b/tutorials/advanced/Advanced 7 - Matching algorithms.ipynb new file mode 100644 index 00000000..9146655d --- /dev/null +++ b/tutorials/advanced/Advanced 7 - Matching algorithms.ipynb @@ -0,0 +1,261 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Hypergraph Matching Algorithms Tutorial\n", + "\n", + "This tutorial highlights the implementation and usage of several hypergraph matching algorithms as presented in our publication: [Distributed Algorithms for Matching in Hypergraphs](https://arxiv.org/abs/2009.09605v1).\n", + "\n", + "## Algorithms Covered\n", + "- Greedy Matching\n", + "- Iterated Sampling\n", + "- HEDCS Matching\n", + "\n", + "We will demonstrate how to use these algorithms with example hypergraphs and compare their performance." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import hypernetx as hnx\n", + "from hypernetx.classes.hypergraph import Hypergraph\n", + "from hypernetx.algorithms.matching_algorithms import greedy_matching, iterated_sampling, HEDCS_matching\n", + "import random\n", + "import logging\n", + "import time\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Example Hypergraph" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# Example hypergraph data\n", + "hypergraph_data = {\n", + " 0: (1, 2, 3),\n", + " 1: (4, 5, 6),\n", + " 2: (7, 8, 9),\n", + " 3: (1, 4, 7),\n", + " 4: (2, 5, 8),\n", + " 5: (3, 6, 9)\n", + "}\n", + "\n", + "# Creating a Hypergraph\n", + "hypergraph = Hypergraph(hypergraph_data)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Greedy Matching Algorithm\n", + "The Greedy Matching algorithm constructs a random k-partitioning of the hypergraph and finds a maximal matching. \n", + "\n", + "### Parameters:\n", + "- `hypergraph`: The input hypergraph.\n", + "- `k`: The number of partitions to divide the hypergraph into.\n", + "\n", + "### Example Usage:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Greedy Matching Result: [(7, 8, 9), (1, 2, 3), (4, 5, 6)]\n" + ] + } + ], + "source": [ + "k = 3\n", + "greedy_result = greedy_matching(hypergraph, k)\n", + "print(\"Greedy Matching Result:\", greedy_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Iterated Sampling Algorithm\n", + "The Iterated Sampling algorithm uses sampling to find a maximal matching in a d-uniform hypergraph. \n", + "\n", + "### Parameters:\n", + "- `hypergraph`: The input hypergraph.\n", + "- `s`: The number of samples to use in the algorithm.\n", + "\n", + "### Example Usage:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Iterated Sampling Result: [[7, 8, 9], [1, 2, 3], [4, 5, 6]]\n" + ] + } + ], + "source": [ + "s = 10\n", + "iterated_result = iterated_sampling(hypergraph, s)\n", + "print(\"Iterated Sampling Result:\", iterated_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## HEDCS Matching Algorithm\n", + "The HEDCS Matching algorithm constructs a Hyper-Edge Degree Constrained Subgraph (HEDCS) to find a maximal matching. \n", + "\n", + "### Parameters:\n", + "- `hypergraph`: The input hypergraph.\n", + "- `s`: The number of samples to use in the algorithm.\n", + "\n", + "### Example Usage:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "HEDCS Matching Result: [[1, 2, 3], [4, 5, 6], [7, 8, 9]]\n" + ] + } + ], + "source": [ + "hedcs_result = HEDCS_matching(hypergraph, s)\n", + "print(\"HEDCS Matching Result:\", hedcs_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Performance Comparison\n", + "We will compare the performance of the algorithms on large random hypergraphs." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def generate_random_hypergraph(n, d, m):\n", + " edges = {f'e{i}': random.sample(range(1, n+1), d) for i in range(m)}\n", + " return Hypergraph(edges)\n", + "\n", + "# Generate random hypergraphs of increasing size\n", + "sizes = [100, 200, 300, 400, 500]\n", + "greedy_times = []\n", + "iterated_times = []\n", + "hedcs_times = []\n", + "\n", + "for size in sizes:\n", + " hypergraph = generate_random_hypergraph(size, 3, size)\n", + " \n", + " start_time = time.time()\n", + " greedy_matching(hypergraph, k)\n", + " greedy_times.append(time.time() - start_time)\n", + " \n", + " start_time = time.time()\n", + " iterated_sampling(hypergraph, s, max_iterations = 500)\n", + " iterated_times.append(time.time() - start_time)\n", + " \n", + " start_time = time.time()\n", + " HEDCS_matching(hypergraph, s)\n", + " hedcs_times.append(time.time() - start_time)\n", + "\n", + "# Plot the results\n", + "plt.figure(figsize=(10, 6))\n", + "plt.plot(sizes, greedy_times, label='Greedy Matching')\n", + "plt.plot(sizes, iterated_times, label='Iterated Sampling')\n", + "plt.plot(sizes, hedcs_times, label='HEDCS Matching')\n", + "plt.xlabel('Hypergraph Size')\n", + "plt.ylabel('Time (seconds)')\n", + "plt.title('Performance Comparison of Hypergraph Matching Algorithms')\n", + "plt.legend()\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conclusion\n", + "In this tutorial, we demonstrated the implementation and usage of several hypergraph matching algorithms. We also compared their performance on random hypergraphs of increasing size.\n", + "\n", + "For more details, please refer to our publication: [Distributed Algorithms for Matching in Hypergraphs](https://arxiv.org/abs/2009.09605v1)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}