Skip to content

Commit

Permalink
[ENH] Add functional submodule (#75)
Browse files Browse the repository at this point in the history
* Add functional submodule
* update lock file

---------

Signed-off-by: Adam Li <[email protected]>
  • Loading branch information
adam2392 authored May 18, 2023
1 parent 8658c5b commit 3de911e
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 4 deletions.
11 changes: 11 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,17 @@ a SCM and their data starting from the causal graph.
simulate.simulate_data_from_var
simulate.simulate_var_process_from_summary_graph

Converting graphs to functional models
======================================
An experimental submodule for converting graphs to functional models, such as
linear structural equation Gaussian models (SEMs).

.. currentmodule:: pywhy_graphs.functional

.. autosummary::
:toctree: generated/

make_graph_linear_gaussian

Visualization of causal graphs
==============================
Expand Down
1 change: 1 addition & 0 deletions docs/whats_new/v0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ Changelog
- |Feature| Implement export/import functions to go to/from pywhy-graphs to pcalg and tetrad, by `Adam Li`_ (:pr:`60`)
- |Feature| Implement export/import functions to go to/from pywhy-graphs to ananke-causal, by `Jaron Lee`_ (:pr:`63`)
- |Feature| Implement pre-commit hooks for development, by `Jaron Lee`_ (:pr:`68`)
- |Feature| Implement a new submodule for converting graphs to a functional model, with :func:`pywhy_graphs.functional.make_graph_linear_gaussian`, by `Adam Li`_ (:pr:`75`)

Code and Documentation Contributors
-----------------------------------
Expand Down
8 changes: 4 additions & 4 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pywhy_graphs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@
from . import classes
from . import networkx
from . import simulate
from . import functional
1 change: 1 addition & 0 deletions pywhy_graphs/functional/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .linear import make_graph_linear_gaussian
104 changes: 104 additions & 0 deletions pywhy_graphs/functional/linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from typing import Callable, List, Optional

import networkx as nx
import numpy as np


def make_graph_linear_gaussian(
G: nx.DiGraph,
node_mean_lims: Optional[List[float]] = None,
node_std_lims: Optional[List[float]] = None,
edge_functions: List[Callable[[float], float]] = None,
edge_weight_lims: Optional[List[float]] = None,
random_state=None,
):
r"""Convert an existing DAG to a linear Gaussian graphical model.
All nodes are sampled from a normal distribution with parametrizations
defined uniformly at random between the limits set by the input parameters.
The edges apply then a weight and a function based on the inputs in an additive fashion.
For node :math:`X_i`, we have:
.. math::
X_i = \\sum_{j \in parents} w_j f_j(X_j) + \\epsilon_i
where:
- :math:`\\epsilon_i \sim N(\mu_i, \sigma_i)`, where :math:`\mu_i` is sampled
uniformly at random from `node_mean_lims` and :math:`\sigma_i` is sampled
uniformly at random from `node_std_lims`.
- :math:`w_j \sim U(\\text{edge_weight_lims})`
- :math:`f_j` is a function sampled uniformly at random
from `edge_functions`
Parameters
----------
G : NetworkX DiGraph
The graph to sample data from. The graph will be modified in-place
to get the weights and functions of the edges.
node_mean_lims : Optional[List[float]], optional
The lower and upper bounds of the mean of the Gaussian random variable, by default None,
which defaults to a mean of 0.
node_std_lims : Optional[List[float]], optional
The lower and upper bounds of the std of the Gaussian random variable, by default None,
which defaults to a std of 1.
edge_functions : List[Callable[float]], optional
The set of edge functions that take in an iid sample from the parent and computes
a transformation (possibly nonlinear), such as ``(lambda x: x**2, lambda x: x)``,
by default None, which defaults to the identity function ``lambda x: x``.
edge_weight_lims : Optional[List[float]], optional
The lower and upper bounds of the edge weight, by default None,
which defaults to a weight of 1.
random_state : int, optional
Random seed, by default None.
Returns
-------
G : NetworkX DiGraph
NetworkX graph with the edge weights and functions set with node attributes
set with ``'parent_functions'``, and ``'gaussian_noise_function'``. Moreover
the graph attribute ``'linear_gaussian'`` is set to ``True``.
"""
if not nx.is_directed_acyclic_graph(G):
raise ValueError("The input graph must be a DAG.")
rng = np.random.default_rng(random_state)

if node_mean_lims is None:
node_mean_lims = [0, 0]
elif len(node_mean_lims) != 2:
raise ValueError("node_mean_lims must be a list of length 2.")
if node_std_lims is None:
node_std_lims = [1, 1]
elif len(node_std_lims) != 2:
raise ValueError("node_std_lims must be a list of length 2.")
if edge_functions is None:
edge_functions = [lambda x: x]
if edge_weight_lims is None:
edge_weight_lims = [1, 1]
elif len(edge_weight_lims) != 2:
raise ValueError("edge_weight_lims must be a list of length 2.")

# Create list of topologically sorted nodes
top_sort_idx = list(nx.topological_sort(G))

for node_idx in top_sort_idx:
# get all parents
parents = sorted(list(G.predecessors(node_idx)))

# sample noise
mean = rng.uniform(low=node_mean_lims[0], high=node_mean_lims[1])
std = rng.uniform(low=node_std_lims[0], high=node_std_lims[1])

# sample weight and edge function for each parent
node_function = dict()
for parent in parents:
weight = rng.uniform(low=edge_weight_lims[0], high=edge_weight_lims[1])
func = rng.choice(edge_functions)
node_function.update({parent: {"weight": weight, "func": func}})

# set the node attribute "functions" to hold the weight and function wrt each parent
nx.set_node_attributes(G, {node_idx: node_function}, "parent_functions")
nx.set_node_attributes(G, {node_idx: {"mean": mean, "std": std}}, "gaussian_noise_function")
G.graph["linear_gaussian"] = True
return G
32 changes: 32 additions & 0 deletions pywhy_graphs/functional/tests/test_linear.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import networkx as nx
import pytest

from pywhy_graphs.functional import make_graph_linear_gaussian
from pywhy_graphs.simulate import simulate_random_er_dag


def test_make_linear_gaussian_graph():
G = simulate_random_er_dag(n_nodes=5, seed=12345, ensure_acyclic=True)

G = make_graph_linear_gaussian(G, random_state=12345)

assert all(key in nx.get_node_attributes(G, "parent_functions") for key in G.nodes)
assert all(key in nx.get_node_attributes(G, "gaussian_noise_function") for key in G.nodes)


def test_make_linear_gaussian_graph_errors():
G = simulate_random_er_dag(n_nodes=2, seed=12345, ensure_acyclic=True)

with pytest.raises(ValueError, match="must be a list of length 2."):
G = make_graph_linear_gaussian(G, node_mean_lims=[0], random_state=12345)

with pytest.raises(ValueError, match="must be a list of length 2."):
G = make_graph_linear_gaussian(G, node_std_lims=[0], random_state=12345)

with pytest.raises(ValueError, match="must be a list of length 2."):
G = make_graph_linear_gaussian(G, edge_weight_lims=[0], random_state=12345)

with pytest.raises(ValueError, match="The input graph must be a DAG."):
G = make_graph_linear_gaussian(
nx.cycle_graph(4, create_using=nx.DiGraph), random_state=12345
)

0 comments on commit 3de911e

Please sign in to comment.