Pull request #220: Hif

Merge in HYP/hypernetx from hif to master * commit '7a651fba73b67622b40c612e9814d255a3d656ba': bump: version 2.3.9 → 2.3.10 add requests via poetry Apply Pre-commit fixes updated hif.py with docs updated toy and hif.py for HIF collaboration updated hypernetx/__init__ to load hif file adde hif.py updated init to include hif code fixed bug in property store lines 373 and 392 misassigned misc_properties_col removed redundancy from misc_properties Created modules branch in toctree. cost work temporarily storing hif to from
pnnl · Dec 18, 2024 · af6c3f0 · af6c3f0
2 parents d641ff0 + 7a651fb
commit af6c3f0
Show file tree

Hide file tree

Showing 11 changed files with 290 additions and 41 deletions.
diff --git a/.cz.toml b/.cz.toml
@@ -1,6 +1,6 @@
 [tool.commitizen]
 name = "cz_conventional_commits"
-version = "2.3.9"
+version = "2.3.10"
 version_provider = "poetry"
 version_files = [
     "pyproject.toml",

diff --git a/docs/source/modularity.rst → .../hypergraph_modularity_and_clustering.rst b/docs/source/modularity.rst → .../hypergraph_modularity_and_clustering.rst
diff --git a/docs/source/algorithms/matching_algorithms.rst b/docs/source/algorithms/matching_algorithms.rst
@@ -35,24 +35,23 @@ These algorithms are crucial for applications that require scalable parallel pro
 
 Usage Example
 -------------
-Below is an example of how to use the matching algorithms module.
-
-```python
-from hypernetx.algorithms import matching_algorithms as ma
-
-# Example hypergraph data
-hypergraph = ... # Assume this is a d-uniform hypergraph
-
-# Compute a matching using the O(d²)-approximation algorithm
-matching = ma.matching_approximation_d_squared(hypergraph)
-
-# Compute a matching using the d-approximation algorithm
-matching_d = ma.matching_approximation_d(hypergraph)
-
-# Compute a matching using the d(d−1 + 1/d)²-approximation algorithm
-matching_d_squared = ma.matching_approximation_dd(hypergraph)
-
-print(matching, matching_d, matching_d_squared)
+Below is an example of how to use the matching algorithms module.::
+	
+	from hypernetx.algorithms import matching_algorithms as ma
+	
+	# Example hypergraph data
+	hypergraph = ... # Assume this is a d-uniform hypergraph
+	
+	# Compute a matching using the O(d²)-approximation algorithm
+	matching = ma.matching_approximation_d_squared(hypergraph)
+	
+	# Compute a matching using the d-approximation algorithm
+	matching_d = ma.matching_approximation_d(hypergraph)
+	
+	# Compute a matching using the d(d−1 + 1/d)²-approximation algorithm
+	matching_d_squared = ma.matching_approximation_dd(hypergraph)
+	
+	print(matching, matching_d, matching_d_squared)
 
 
 References

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -19,7 +19,7 @@
 import os
 
 
-__version__ = "2.3.9"
+__version__ = "2.3.10"
 
 
 # If extensions (or modules to document with autodoc) are in another directory,

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -59,7 +59,7 @@ Contents
    A Gentle Introduction to Hypergraph Mathematics <hypergraph101>
    Hypergraph Constructors <hypconstructors>
    Visualization Widget <widget>
-   Algorithms: Modularity and Clustering <modularity>
+   Modules <modules>
    Publications <publications>
    Contributors Guide <contributions>
    license

diff --git a/docs/source/modules.rst b/docs/source/modules.rst
@@ -0,0 +1,18 @@
+.. _Modules:
+
+********************
+Modules
+********************
+
+This page provides additionally documentation for various contributions to HyperNetX through modules.
+We use the term module to describe an advanced method for studying hypergraphs. 
+Each module includes a descriptive document here as well as a notebook showcasing its functionality.
+Tutorial notebooks can be found for each module under the `advanced tutorials  <https://github.com/pnnl/HyperNetX/tree/master/tutorials/advanced>`_ folder in GitHub. 
+
+**Modules with additional documentation:**
+
+.. toctree::
+   :maxdepth: 1
+
+   Hypergraph Modularity and Clustering <algorithms/hypergraph_modularity_and_clustering>
+   Hypergraph Matching <algorithms/matching_algorithms>
diff --git a/hypernetx/__init__.py b/hypernetx/__init__.py
@@ -3,12 +3,12 @@
     HyperNetXError,
     HyperNetXNotImplementedError,
 )
-from hypernetx.read_write import to_pickle, load_from_pickle
+from hypernetx.hif import to_hif, from_hif
 from hypernetx.classes import *
 from hypernetx.reports import *
 from hypernetx.drawing import *
 from hypernetx.algorithms import *
 from hypernetx.utils import *
 from hypernetx.utils.toys import *
 
-__version__ = "2.3.9"
+__version__ = "2.3.10"
diff --git a/hypernetx/hif.py b/hypernetx/hif.py
@@ -0,0 +1,195 @@
+# Copyright © 2024 Battelle Memorial Institute
+# All rights reserved.
+
+import hypernetx as hnx
+import pandas as pd
+import json
+import fastjsonschema
+import requests
+from copy import deepcopy
+from .exception import HyperNetXError
+
+schema_url = "https://raw.githubusercontent.com/pszufe/HIF_validators/main/schemas/hif_schema_v0.1.0.json"
+resp = requests.get(schema_url)
+schema = json.loads(resp.text)
+validator = fastjsonschema.compile(schema)
+
+
+def normalize_dataframe(df):
+    """
+    Moves common attributes into misc_properties for translating into HIF.
+
+    Parameters
+    ----------
+    df : pd.DataFrame
+        HypergraphView.dataframe
+
+    Returns
+    -------
+    pd.DataFrame
+        allowed columns are limited to HIF keys
+    """
+    default_cols = (
+        ["weight"]
+        + list(set(df.columns).intersection(["direction"]))
+        + ["misc_properties"]
+    )
+    cols = list(set(df.columns).difference(default_cols))
+    dfdict = df[cols].T.to_dict()
+    newdf = df[default_cols]
+    for uid in newdf.index:
+        newdf.loc[uid]["misc_properties"].update(dfdict[uid])
+    return newdf.fillna("nil")
+
+
+def to_hif(hg, filename=None, network_type="undirected", metadata=None):
+    """
+    Returns a dictionary object valid for the HIF Json schema
+
+    Parameters
+    ----------
+    hg : hnx.Hypergraph
+
+    filename : str, optional
+        filepath where json object is to be stored, by default None
+    network_type : str, optional
+        One of 'undirected','directed','asc', by default 'undirected'
+    metadata : dict, optional
+        Additional information to store, by default None
+
+    Returns
+    -------
+    hif : dict
+        format is defined by HIF schema
+    """
+    hyp_objs = ["nodes", "edges", "incidences"]
+    defaults = {
+        part: dict(getattr(hg, part).property_store._defaults) for part in hyp_objs
+    }
+    for part in hyp_objs:
+        misc_properties = defaults[part].pop("misc_properties", {})
+        defaults[part]["attrs"] = dict(misc_properties)
+
+    incj = deepcopy(hg.incidences.to_dataframe)
+    incj.index.names = ["edge", "node"]
+    incj = normalize_dataframe(incj)
+    incj = incj.rename(columns={"misc_properties": "attrs"})
+    incj = incj.reset_index().to_dict(orient="records")
+
+    edgj = deepcopy(hg.edges.to_dataframe)
+    edgj.index.names = ["edge"]
+    edgj = normalize_dataframe(edgj)
+    edgj = edgj.rename(columns={"misc_properties": "attrs"})
+    edgj = edgj.reset_index().to_dict(orient="records")
+
+    nodj = deepcopy(hg.nodes.to_dataframe)
+    nodj.index.names = ["node"]
+    nodj = normalize_dataframe(nodj)
+    nodj = nodj.rename(columns={"misc_properties": "attrs"})
+    nodj = nodj.reset_index().to_dict(orient="records")
+
+    if isinstance(metadata, dict):
+        metadata = metadata.update({"default_attrs": defaults})
+    else:
+        metadata = {"default_attrs": defaults}
+    if hg.name is not None:
+        metadata["name"] = hg.name
+
+    hif = {
+        "edges": edgj,
+        "nodes": nodj,
+        "incidences": incj,
+        "network-type": network_type,
+        "metadata": metadata,
+    }
+    try:
+        validator(hif)
+        if filename is not None:
+            json.dump(hif, open(filename, "w"))
+        return hif
+    except Exception as ex:
+        HyperNetXError(ex)
+
+
+def from_hif(hif=None, filename=None):
+    """
+    Reads HIF formatted string or dictionary and returns corresponding
+    hnx.Hypergraph
+
+    Parameters
+    ----------
+    hif : dict, optional
+        Useful if file is read by json and inspected before turning into a hypergraph,
+        by default None
+    filename : str, optional
+        Full path to location of HIF formatted JSON in storage,
+        by default None
+
+    Returns
+    -------
+    hnx.Hypergraph
+
+    """
+    if hif is not None:
+        try:
+            validator(hif)
+        except Exception as ex:
+            HyperNetXError(ex)
+            return None
+    elif filename is not None:
+        hif = json.load(open(filename, "r"))
+        try:
+            validator(hif)
+        except Exception as ex:
+            HyperNetXError(ex)
+            return None
+    else:
+        print("No data given")
+
+    mkdd = lambda: {"weight": 1, "attrs": {}}
+    hifex = deepcopy(hif)
+    parts = {
+        part: deepcopy(pd.DataFrame(hifex.get(part, {})))
+        for part in ["nodes", "edges", "incidences"]
+    }
+    metadata = hifex.get("metadata", {})
+    defaults = metadata.get("default_attrs", {})
+    defaults = {part: defaults.get(part, mkdd()) for part in parts}
+    # cols = dict()
+    default_weights = {part: defaults[part].get("weight", 1) for part in parts}
+    for part in parts:
+        if len(part) == 0:
+            continue
+        thispart = parts[part]
+        d = deepcopy(defaults[part])
+        dkeys = [k for k in d.keys() if k not in ["weight", "attrs"]]
+        # cols[part] = ['weight'] + dkeys + ['attrs']
+        if len(dkeys) > 0:
+            for attr in dkeys:
+                thispart[attr] = [
+                    row.attrs.pop(attr, d[attr]) for row in thispart.itertuples()
+                ]
+    hyp_objects = dict()
+    for part in ["nodes", "edges"]:
+        if len(parts[part]) > 0:
+            uid = part[:-1]
+            cols = [uid] + list(set(parts[part].columns).difference([uid]))
+            hyp_objects[part] = parts[part][cols]
+        else:
+            hyp_objects[part] = None
+    cols = ["edge", "node"] + list(
+        set(parts["incidences"].columns).difference(["edge", "node"])
+    )
+    incidences = parts["incidences"][cols]
+    name = metadata.get("name", None)
+    return hnx.Hypergraph(
+        incidences,
+        default_cell_weight=default_weights["incidences"],
+        misc_cell_properties_col="attrs",
+        node_properties=hyp_objects["nodes"],
+        default_edge_weight=default_weights["edges"],
+        edge_properties=hyp_objects["edges"],
+        default_node_weight=default_weights["nodes"],
+        misc_properties_col="attrs",
+        name=name,
+    )
diff --git a/hypernetx/read_write.py b/hypernetx/read_write.py
diff --git a/hypernetx/utils/toys/lesmis.py b/hypernetx/utils/toys/lesmis.py
@@ -2,6 +2,7 @@
 # All rights reserved.
 
 import pandas as pd
+import numpy as np
 from itertools import islice, chain, repeat
 
 import matplotlib.pyplot as plt
@@ -42,6 +43,32 @@ def __init__(self):
     def dnames(self):
         return self.df_names.set_index("Symbol")
 
+    def hypergraph_example(self):
+
+        names = self.df_names
+        scenes = self.df_scenes
+        scenes["edge"] = [
+            ".".join([str(scenes.loc[idx][col]) for col in scenes.columns[:-2]])
+            for idx in scenes.index
+        ]
+        scenes["node"] = scenes["Characters"]
+        df = scenes[["edge", "node"]]
+        cell_weights = df.groupby(["edge"]).count().to_dict()["node"]
+        df["weight"] = df.edge.map(lambda e: np.round(1 / cell_weights.get(e, 1), 2))
+        nprops = names
+        nprops["weight"] = np.round(np.random.uniform(0, 1, len(names)), 2)
+        lm = hnx.Hypergraph(
+            df,
+            cell_weight_col="weight",
+            node_properties=nprops,
+            node_weight_prop_col="weight",
+            name="LesMis example from HNX",
+        )
+        lm.nodes["JV"].job = "mayor"
+        lm.nodes["MY"].avocation = "to be kind"
+        lm.nodes["BS"].vocation = "explorer"
+        return lm
+
 
 def lesmis_hypergraph_from_df(df, by="Chapter", on="Characters"):
     cols = df.columns.tolist()
@@ -54,6 +81,32 @@ def lesmis_hypergraph_from_df(df, by="Chapter", on="Characters"):
     )
 
 
+def lesmis_hypergraph():
+    lesmis = LesMis()
+    names = lesmis.df_names
+    scenes = lesmis.df_scenes
+    scenes["edge"] = [
+        ".".join([str(scenes.loc[idx][col]) for col in scenes.columns[:-2]])
+        for idx in scenes.index
+    ]
+    scenes["node"] = scenes["Characters"]
+    df = scenes[["edge", "node"]]
+    cell_weights = df.groupby(["edge"]).count().to_dict()["node"]
+    df["weight"] = df.edge.map(lambda e: np.round(1 / cell_weights.get(e, 1), 2))
+    nprops = names
+    nprops["weight"] = np.round(np.random.uniform(0, 1, len(names)), 2)
+    lm = hnx.Hypergraph(
+        df,
+        cell_weight_col="weight",
+        node_properties=nprops,
+        node_weight_prop_col="weight",
+    )
+    lm.nodes["JV"].job = "mayor"
+    lm.nodes["MY"].avocation = "to be kind"
+    lm.nodes["BS"].vocation = "explorer"
+    return lm
+
+
 def book_tour(df, xlabel="Book", ylabel="Volume", s=3.5):
     """
     Constructs a visualization of hypergraphs stored in an indexed