From 5bab9d705d3dbc1cb91df3c60aa7c3c995f7e3d3 Mon Sep 17 00:00:00 2001 From: Brenda Praggastis Date: Mon, 29 Apr 2024 15:57:44 -0700 Subject: [PATCH 1/2] added items property to hyp_view and intersection and union to hypergraph --- hypernetx/classes/hyp_view.py | 4 ++ hypernetx/classes/hypergraph.py | 76 +++++++++++++++++++++++++-------- 2 files changed, 63 insertions(+), 17 deletions(-) diff --git a/hypernetx/classes/hyp_view.py b/hypernetx/classes/hyp_view.py index 61d2731c..38f66519 100644 --- a/hypernetx/classes/hyp_view.py +++ b/hypernetx/classes/hyp_view.py @@ -63,6 +63,10 @@ def __init__(self, incidence_store, level, property_store=None): ### returns weight 1 on every call for a weight ### and empty properties otherwise. + @property + def items(self): + return set(self._items) + @property def level(self): return self._level diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index ffc34ce5..b4da31c4 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -1138,11 +1138,11 @@ def _construct_hyp_from_stores( h._E = HypergraphView(incidence_store, 2, incidence_ps) if edge_ps is None: - edge_ps = PropertyStore(df.edges.to_dataframe) + edge_ps = PropertyStore(self.edges.to_dataframe) h._edges = HypergraphView(incidence_store, 0, edge_ps) if node_ps is None: - node_ps = PropertyStore(df.nodes.to_dataframe) + node_ps = PropertyStore(self.nodes.to_dataframe) h._nodes = HypergraphView(incidence_store, 1, node_ps) h._set_default_state() @@ -2519,8 +2519,8 @@ def __sub__(self, other): def sum(self, other): """ - Concatenate incidences from two hypergraphs, removing duplicates and - dropping duplicate property data in the order of addition. + Hypergraph obtained by joining incidences from self and other. + Removes duplicates and uses properties of self. Parameters ---------- @@ -2531,22 +2531,28 @@ def sum(self, other): Hypergraph """ - df = self.to_dataframe - odf = other.to_dataframe - ndf = pd.concat([df, odf]).groupby(["edges", "nodes"]).agg("first") + df = self.incidences.to_dataframe + odf = other.incidences.to_dataframe + ndf = pd.concat([df, odf]) + ndf = ndf[~ndf.index.duplicated(keep="first")] + edf = self.edges.to_dataframe oedf = other.edges.to_dataframe - nedf = pd.concat([edf, oedf]).groupby("uid").agg("first") + nedf = pd.concat([edf, oedf]) + nedf = nedf[~nedf.index.duplicated(keep="first")] + nddf = self.nodes.to_dataframe onddf = other.nodes.to_dataframe - nnddf = pd.concat([nddf, onddf]).groupby("uid").agg("first") + nnddf = pd.concat([nddf, onddf]) + nnddf[~nnddf.index.duplicated(keep="first")] + return self._construct_hyp_from_stores( ndf, edge_ps=PropertyStore(nedf), node_ps=PropertyStore(nnddf) ) - def difference(self, other): + def difference(self, other, name=None): """ - Remove incidence pairs from self that belong to other. + Hypergraph obtained by restricting to incidences in self but not in other. Parameters ---------- @@ -2557,12 +2563,48 @@ def difference(self, other): : Hypergraph """ - df = self.incidences.properties - odf = other.incidences.properties - ndf = df.loc[~df.index.isin(odf.index.tolist())] - return self._construct_hyp_from_stores( - ndf, edge_ps=self.edges._property_store, node_ps=self.nodes._property_store - ) + ndx = list(self.incidences.items.difference(other.incidences.items)) + ndf = self.incidences.to_dataframe.loc[ndx] + return self._construct_hyp_from_stores(ndf, name=name) + + def intersection(self, other, name=None): + """ + The hypergraph gotten by restricting to incidence pairs contained in + both self and other. Properties inherited from self. + + Parameters + ---------- + other : Hypergraph + name : str, optional + by default None + + Returns + ------- + : Hypergraph + + """ + ndx = list(self.incidences.items.intersection(other.incidences.items)) + ndf = self.incidences.to_dataframe.loc[ndx] + return self._construct_hyp_from_stores(ndf, name=name) + + def union(self, other, name=None): + """ + The hypergraph gotten by joining incidence pairs contained in + self and other. Duplicates removed. Properties inherited from self. + Same as sum. + + Parameters + ---------- + other : Hypergraph + name : str, optional + by default None + + Returns + ------- + : Hypergraph + + """ + return self.sum(other) def _agg_rows(df, groupby, rule_dict=None): From b178da64e06a3bc4a2f5cad61ed4361b8f2bbf6b Mon Sep 17 00:00:00 2001 From: Mark Bonicillo Date: Tue, 30 Apr 2024 11:18:47 -0700 Subject: [PATCH 2/2] Fix and refactor sum method; add tests --- hypernetx/classes/hyp_view.py | 1 - hypernetx/classes/hypergraph.py | 31 ++++++++++++++++--------------- tests/classes/test_hypergraph.py | 12 ++++++++++-- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/hypernetx/classes/hyp_view.py b/hypernetx/classes/hyp_view.py index c0568810..38f66519 100644 --- a/hypernetx/classes/hyp_view.py +++ b/hypernetx/classes/hyp_view.py @@ -102,7 +102,6 @@ def to_dataframe(self): non_user_defined_properties = pd.DataFrame( index=non_user_defined_items, data=default_data ) - non_user_defined_properties.index.rename("uid", inplace=True) # Combine user-defined and non-user-defined properties into one dataframe return pd.concat([df, non_user_defined_properties]) diff --git a/hypernetx/classes/hypergraph.py b/hypernetx/classes/hypergraph.py index 5087a43b..facd3c1d 100644 --- a/hypernetx/classes/hypergraph.py +++ b/hypernetx/classes/hypergraph.py @@ -2551,25 +2551,26 @@ def sum(self, other): Hypergraph """ - df = self.incidences.to_dataframe - odf = other.incidences.to_dataframe - ndf = pd.concat([df, odf]) - ndf = ndf[~ndf.index.duplicated(keep="first")] - - edf = self.edges.to_dataframe - oedf = other.edges.to_dataframe - nedf = pd.concat([edf, oedf]) - nedf = nedf[~nedf.index.duplicated(keep="first")] - - nddf = self.nodes.to_dataframe - onddf = other.nodes.to_dataframe - nnddf = pd.concat([nddf, onddf]) - nnddf[~nnddf.index.duplicated(keep="first")] + incidence_df = self._combine_properties_dataframes( + self.incidences.to_dataframe, other.incidences.to_dataframe + ) + edges_data = self._combine_properties_dataframes( + self.edges.to_dataframe, other.edges.to_dataframe + ) + nodes_data = self._combine_properties_dataframes( + self.nodes.to_dataframe, other.nodes.to_dataframe + ) return self._construct_hyp_from_stores( - ndf, edge_ps=PropertyStore(nedf), node_ps=PropertyStore(nnddf) + incidence_df, + edge_ps=PropertyStore(edges_data), + node_ps=PropertyStore(nodes_data), ) + def _combine_properties_dataframes(self, df1, df2): + df = pd.concat([df1, df2]) + return df[~df.index.duplicated(keep="first")] + def difference(self, other, name=None): """ Hypergraph obtained by restricting to incidences in self but not in other. diff --git a/tests/classes/test_hypergraph.py b/tests/classes/test_hypergraph.py index 1554c896..c6537a34 100644 --- a/tests/classes/test_hypergraph.py +++ b/tests/classes/test_hypergraph.py @@ -1155,6 +1155,7 @@ def test_sum_hypergraph_with_dupe_hypergraph(sevenbysix, sevenbysix_dupes): hg = Hypergraph(sevenbysix.edgedict) hg_dupes = Hypergraph(sevenbysix_dupes.edgedict) + # Case: HDuplicate + H # add almost duplicate hypergraph to hypergraph new_hg = hg.sum(hg_dupes) @@ -1169,14 +1170,21 @@ def test_sum_hypergraph_with_dupe_hypergraph(sevenbysix, sevenbysix_dupes): "R": ["A", "E", "F"], "S": ["A", "K", "T2", "V"], } - assert new_hg.incidences.incidence_dict == expected_incidences + actual_incidences = new_hg.incidence_dict + for expected_edge, expected_nodes in expected_incidences.items(): + assert expected_edge in actual_incidences + assert all(node in actual_incidences[expected_edge] for node in expected_nodes) + # Case: H + HDuplicate # add hypergraph to almost duplicate new_hg = hg_dupes.sum(hg) assert new_hg.shape == (len(sevenbysix.nodes) + 1, len(sevenbysix.edges) + 1) # check for new incidences - assert new_hg.incidences.incidence_dict == expected_incidences + actual_incidences = new_hg.incidences.incidence_dict + for expected_edge, expected_nodes in expected_incidences.items(): + assert expected_edge in actual_incidences + assert all(node in actual_incidences[expected_edge] for node in expected_nodes) @pytest.mark.parametrize(