upload example code for model implementation

microsoft · Jun 18, 2024 · f5ac317 · f5ac317
1 parent 8f99778
commit f5ac317
Show file tree

Hide file tree

Showing 3 changed files with 282 additions and 0 deletions.
diff --git a/rdagent/model_implementation/evaluator.py b/rdagent/model_implementation/evaluator.py
@@ -0,0 +1,63 @@
+import torch
+import numpy as np
+
+
+def shape_evaluator(target, prediction):
+    if target is None or prediction is None:
+        return None, 0
+    tar_shape = target.shape
+    pre_shape = prediction.shape
+
+    diff = []
+    for i in range(max(len(tar_shape), len(pre_shape))):
+        dim_tar = tar_shape[i] if i < len(tar_shape) else 0
+        dim_pre = pre_shape[i] if i < len(pre_shape) else 0
+        diff.append(abs(dim_tar - dim_pre))
+
+    metric = 1 / (np.exp(np.mean(diff)) + 1)
+    return diff, metric
+
+
+def reshape_tensor(original_tensor, target_shape):
+    new_tensor = torch.zeros(target_shape)
+    for i, dim in enumerate(original_tensor.shape):
+        new_tensor = new_tensor.narrow(i, 0, dim).copy_(original_tensor)
+
+    return new_tensor
+
+
+def value_evaluator(target, prediction):
+    if target is None or prediction is None:
+        return None, 0
+    tar_shape = target.shape
+    pre_shape = prediction.shape
+
+    # Determine the shape of the padded tensors
+    dims = [
+        max(s1, s2)
+        for s1, s2 in zip(
+            tar_shape + (1,) * (len(pre_shape) - len(tar_shape)),
+            pre_shape + (1,) * (len(tar_shape) - len(pre_shape)),
+        )
+    ]
+    # Reshape both tensors to the determined shape
+    target = target.reshape(
+        *tar_shape, *(1,) * (max(len(tar_shape), len(pre_shape)) - len(tar_shape))
+    )
+    prediction = prediction.reshape(
+        *pre_shape, *(1,) * (max(len(tar_shape), len(pre_shape)) - len(pre_shape))
+    )
+    target_padded = reshape_tensor(target, dims)
+    prediction_padded = reshape_tensor(prediction, dims)
+
+    # Calculate the mean absolute difference
+    diff = torch.abs(target_padded - prediction_padded)
+    metric = 1 / (1 + np.exp(torch.mean(diff).item()))
+    return diff, metric
+
+
+if __name__ == "__main__":
+    tar = torch.rand(4, 5, 5)
+    pre = torch.rand(4, 1)
+    print(shape_evaluator(tar, pre))
+    print(value_evaluator(tar, pre)[1])
diff --git a/rdagent/model_implementation/gt_code.py b/rdagent/model_implementation/gt_code.py
@@ -0,0 +1,132 @@
+import math
+from typing import Any, Callable, Dict, Optional, Union
+
+import torch
+from torch import Tensor
+from torch.nn import Parameter
+
+from torch_geometric.nn.conv import GCNConv, MessagePassing
+from torch_geometric.nn.inits import zeros
+from torch_geometric.nn.resolver import activation_resolver
+from torch_geometric.typing import Adj
+
+
+class AntiSymmetricConv(torch.nn.Module):
+    r"""The anti-symmetric graph convolutional operator from the
+    `"Anti-Symmetric DGN: a stable architecture for Deep Graph Networks"
+    <https://openreview.net/forum?id=J3Y7cgZOOS>`_ paper.
+
+    .. math::
+        \mathbf{x}^{\prime}_i = \mathbf{x}_i + \epsilon \cdot \sigma \left(
+            (\mathbf{W}-\mathbf{W}^T-\gamma \mathbf{I}) \mathbf{x}_i +
+            \Phi(\mathbf{X}, \mathcal{N}_i) + \mathbf{b}\right),
+
+    where :math:`\Phi(\mathbf{X}, \mathcal{N}_i)` denotes a
+    :class:`~torch.nn.conv.MessagePassing` layer.
+
+    Args:
+        in_channels (int): Size of each input sample.
+        phi (MessagePassing, optional): The message passing module
+            :math:`\Phi`. If set to :obj:`None`, will use a
+            :class:`~torch_geometric.nn.conv.GCNConv` layer as default.
+            (default: :obj:`None`)
+        num_iters (int, optional): The number of times the anti-symmetric deep
+            graph network operator is called. (default: :obj:`1`)
+        epsilon (float, optional): The discretization step size
+            :math:`\epsilon`. (default: :obj:`0.1`)
+        gamma (float, optional): The strength of the diffusion :math:`\gamma`.
+            It regulates the stability of the method. (default: :obj:`0.1`)
+        act (str, optional): The non-linear activation function :math:`\sigma`,
+            *e.g.*, :obj:`"tanh"` or :obj:`"relu"`. (default: :class:`"tanh"`)
+        act_kwargs (Dict[str, Any], optional): Arguments passed to the
+            respective activation function defined by :obj:`act`.
+            (default: :obj:`None`)
+        bias (bool, optional): If set to :obj:`False`, the layer will not learn
+            an additive bias. (default: :obj:`True`)
+
+    Shapes:
+        - **input:**
+          node features :math:`(|\mathcal{V}|, F_{in})`,
+          edge indices :math:`(2, |\mathcal{E}|)`,
+          edge weights :math:`(|\mathcal{E}|)` *(optional)*
+        - **output:** node features :math:`(|\mathcal{V}|, F_{in})`
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        phi: Optional[MessagePassing] = None,
+        num_iters: int = 1,
+        epsilon: float = 0.1,
+        gamma: float = 0.1,
+        act: Union[str, Callable, None] = "tanh",
+        act_kwargs: Optional[Dict[str, Any]] = None,
+        bias: bool = True,
+    ):
+        super().__init__()
+
+        self.in_channels = in_channels
+        self.num_iters = num_iters
+        self.gamma = gamma
+        self.epsilon = epsilon
+        self.act = activation_resolver(act, **(act_kwargs or {}))
+
+        if phi is None:
+            phi = GCNConv(in_channels, in_channels, bias=False)
+
+        self.W = Parameter(torch.empty(in_channels, in_channels))
+        self.register_buffer("eye", torch.eye(in_channels))
+        self.phi = phi
+
+        if bias:
+            self.bias = Parameter(torch.empty(in_channels))
+        else:
+            self.register_parameter("bias", None)
+
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        r"""Resets all learnable parameters of the module."""
+        torch.nn.init.kaiming_uniform_(self.W, a=math.sqrt(5))
+        self.phi.reset_parameters()
+        zeros(self.bias)
+
+    def forward(self, x: Tensor, edge_index: Adj, *args, **kwargs) -> Tensor:
+        r"""Runs the forward pass of the module."""
+        antisymmetric_W = self.W - self.W.t() - self.gamma * self.eye
+
+        for _ in range(self.num_iters):
+            h = self.phi(x, edge_index, *args, **kwargs)
+            h = x @ antisymmetric_W.t() + h
+
+            if self.bias is not None:
+                h += self.bias
+
+            if self.act is not None:
+                h = self.act(h)
+
+            x = x + self.epsilon * h
+
+        return x
+
+    def __repr__(self) -> str:
+        return (
+            f"{self.__class__.__name__}("
+            f"{self.in_channels}, "
+            f"phi={self.phi}, "
+            f"num_iters={self.num_iters}, "
+            f"epsilon={self.epsilon}, "
+            f"gamma={self.gamma})"
+        )
+
+
+if __name__ == "__main__":
+    node_features = torch.load("node_features.pt")
+    edge_index = torch.load("edge_index.pt")
+
+    # Model instantiation and forward pass
+    model = AntiSymmetricConv(in_channels=node_features.size(-1))
+    output = model(node_features, edge_index)
+
+    # Save output to a file
+    torch.save(output, "gt_output.pt")
diff --git a/rdagent/model_implementation/main.py b/rdagent/model_implementation/main.py
@@ -0,0 +1,87 @@
+from dotenv import load_dotenv
+from rdagent.oai.llm_utils import APIBackend
+
+# randomly generate a input graph, node_feature and edge_index
+# 1000 nodes, 128 dim node feature, 2000 edges
+import torch
+import os
+
+assert load_dotenv()
+formula_info = {
+    "name": "Anti-Symmetric Deep Graph Network (A-DGN)",
+    "description": "A framework for stable and non-dissipative DGN design. It ensures long-range information preservation between nodes and prevents gradient vanishing or explosion during training.",
+    "formulation": "x_u^{(l)} = x_u^{(l-1)} + \\epsilon \\sigma \\left( W^T x_u^{(l-1)} + \\Phi(X^{(l-1)}, N_u) + b \\right)",
+    "variables": {
+        "x_u^{(l)}": "The state of node u at layer l",
+        "\\epsilon": "The step size in the Euler discretization",
+        "\\sigma": "A monotonically non-decreasing activation function",
+        "W": "An anti-symmetric weight matrix",
+        "X^{(l-1)}": "The node feature matrix at layer l-1",
+        "N_u": "The set of neighbors of node u",
+        "b": "A bias vector",
+    },
+}
+
+system_prompt = "You are an assistant whose job is to answer user's question."
+user_prompt = "With the following given information, write a python code using pytorch and torch_geometric to implement the model. This model is in the graph learning field, only have one layer. The input will be node_feature [num_nodes, dim_feature] and edge_index [2, num_edges], and they should be loaded from the files 'node_features.pt' and 'edge_index.pt'. There is not edge attribute or edge weight as input. The model should detect the node_feature and edge_index shape, if there is Linear transformation layer in the model, the input and output shape should be consistent. The in_channels is the dimension of the node features. You code should contain additional 'if __name__ == '__main__', where you should load the node_feature and edge_index from the files and run the model, and save the output to a file 'llm_output.pt'. Implement the model forward function based on the following information: model formula information. 1. model name: {}, 2. model description: {}, 3. model formulation: {}, 4. model variables: {}. You must complete the forward function as far as you can do.".format(
+    formula_info["name"],
+    formula_info["description"],
+    formula_info["formulation"],
+    formula_info["variables"],
+)
+
+resp = APIBackend(use_chat_cache=False).build_messages_and_create_chat_completion(
+    user_prompt, system_prompt
+)
+
+print(resp)
+
+# take the code part from the response and save it to a file, the code is covered in the ```python``` block
+code = resp.split("```python")[1].split("```")[0]
+with open("llm_code.py", "w") as f:
+    f.write(code)
+
+average_shape_eval = []
+average_value_eval = []
+for test_mode in ["zeros", "ones", "randn"]:
+
+    if test_mode == "zeros":
+        node_feature = torch.zeros(1000, 128)
+    elif test_mode == "ones":
+        node_feature = torch.ones(1000, 128)
+    elif test_mode == "randn":
+        node_feature = torch.randn(1000, 128)
+    edge_index = torch.randint(0, 1000, (2, 2000))
+
+    torch.save(node_feature, "node_features.pt")
+    torch.save(edge_index, "edge_index.pt")
+
+    try:
+        os.system("python llm_code.py")
+    except:
+        print("Error in running the LLM code")
+    os.system("python gt_code.py")
+    os.system("rm edge_index.pt")
+    os.system("rm node_features.pt")
+    # load the output and print the shape
+
+    from evaluator import shape_evaluator, value_evaluator
+
+    try:
+        llm_output = torch.load("llm_output.pt")
+    except:
+        llm_output = None
+    gt_output = torch.load("gt_output.pt")
+
+    average_shape_eval.append(shape_evaluator(llm_output, gt_output)[1])
+    average_value_eval.append(value_evaluator(llm_output, gt_output)[1])
+
+    print("Shape evaluation: ", average_shape_eval[-1])
+    print("Value evaluation: ", average_value_eval[-1])
+
+    os.system("rm llm_output.pt")
+    os.system("rm gt_output.pt")
+os.system("rm llm_code.py")
+
+print("Average shape evaluation: ", sum(average_shape_eval) / len(average_shape_eval))
+print("Average value evaluation: ", sum(average_value_eval) / len(average_value_eval))