diff --git a/examples/dna/dna_trainer.py b/examples/dna/dna_trainer.py new file mode 100644 index 00000000..d08167b4 --- /dev/null +++ b/examples/dna/dna_trainer.py @@ -0,0 +1,144 @@ +import os +# os.environ['CUDA_VISIBLE_DEVICES'] = '0' +# os.environ['TL_BACKEND'] = 'torch' +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' +# 0:Output all; 1:Filter out INFO; 2:Filter out INFO and WARNING; 3:Filter out INFO, WARNING, and ERROR + +import argparse +import tensorlayerx as tlx +from gammagl.datasets import Planetoid +from gammagl.utils import add_self_loops, mask_to_index +from tensorlayerx.model import TrainOneStep, WithLoss +from gammagl.models import DNAModel +from sklearn.model_selection import StratifiedKFold +import numpy as np + + +class SemiSpvzLoss(WithLoss): + def __init__(self, net, loss_fn): + super(SemiSpvzLoss, self).__init__(backbone=net, loss_fn=loss_fn) + + def forward(self, data, y): + logits = self.backbone_network(data['x'], data['edge_index']) + train_logits = tlx.gather(logits, data['train_idx']) + train_y = tlx.gather(data['y'], data['train_idx']) + loss = self._loss_fn(train_logits, train_y) + return loss + + +def calculate_acc(logits, y, metrics): + """ + Args: + logits: node logits + y: node labels + metrics: tensorlayerx.metrics + + Returns: + rst + """ + + metrics.update(logits, y) + rst = metrics.result() + metrics.reset() + return rst + +def gen_uniform_20_20_60_split(data): + skf = StratifiedKFold(5, shuffle=True, random_state=55) + data.y = tlx.convert_to_numpy(data.y) + idx = [tlx.convert_to_tensor(i) for _, i in skf.split(data.y, data.y)] + data.train_idx = tlx.convert_to_tensor(idx[0], dtype=tlx.int64) + data.val_idx = tlx.convert_to_tensor(idx[1], dtype=tlx.int64) + data.test_idx = tlx.convert_to_tensor(tlx.concat(idx[2:], axis=0), dtype=tlx.int64) + data.y = tlx.convert_to_tensor(data.y) + return data + + +def main(args): + # load datasets + if str.lower(args.dataset) not in ['cora','pubmed','citeseer']: + raise ValueError('Unknown dataset: {}'.format(args.dataset)) + dataset = Planetoid(args.dataset_path, args.dataset) + graph = dataset[0] + graph = gen_uniform_20_20_60_split(graph) + + net = DNAModel(in_channels=dataset.num_node_features, + hidden_channels=args.hidden_dim, + out_channels=dataset.num_classes, + num_layers=args.num_layers, + drop_rate_conv=args.drop_rate_conv, + drop_rate_model=args.drop_rate_model, + heads=args.heads, + groups=args.groups, + name="DNA") + + optimizer = tlx.optimizers.Adam(lr=args.lr, weight_decay=args.l2_coef) + metrics = tlx.metrics.Accuracy() + train_weights = net.trainable_weights + + loss_func = SemiSpvzLoss(net, tlx.losses.softmax_cross_entropy_with_logits) + train_one_step = TrainOneStep(loss_func, optimizer, train_weights) + + data = { + "x": graph.x, + "y": graph.y, + "edge_index": graph.edge_index, + # "edge_weight": edge_weight, + "train_idx": graph.train_idx, + "test_idx": graph.test_idx, + "val_idx": graph.val_idx, + "num_nodes": graph.num_nodes, + } + + best_val_acc = 0 + for epoch in range(args.n_epoch): + net.set_train() + train_loss = train_one_step(data, graph.y) + net.set_eval() + logits = net(data['x'], data['edge_index']) + val_logits = tlx.gather(logits, data['val_idx']) + val_y = tlx.gather(data['y'], data['val_idx']) + val_acc = calculate_acc(val_logits, val_y, metrics) + + print("Epoch [{:0>3d}] ".format(epoch+1)\ + + " train loss: {:.4f}".format(train_loss.item())\ + + " val acc: {:.4f}".format(val_acc)) + + # save best model on evaluation set + if val_acc > best_val_acc: + best_val_acc = val_acc + net.save_weights(args.best_model_path+net.name+".npz", format='npz_dict') + + net.load_weights(args.best_model_path+net.name+".npz", format='npz_dict') + net.set_eval() + logits = net(data['x'], data['edge_index']) + test_logits = tlx.gather(logits, data['test_idx']) + test_y = tlx.gather(data['y'], data['test_idx']) + test_acc = calculate_acc(test_logits, test_y, metrics) + print("Test acc: {:.4f}".format(test_acc)) + + +if __name__ == '__main__': + # parameters setting + parser = argparse.ArgumentParser() + parser.add_argument("--lr", type=float, default=0.005, help="learnin rate") + parser.add_argument("--n_epoch", type=int, default=200, help="number of epoch") + parser.add_argument("--hidden_dim", type=int, default=128, help="dimention of hidden layers") + parser.add_argument("--drop_rate_conv", type=float, default=0.8, help="drop_rate_conv") + parser.add_argument("--drop_rate_model", type=float, default=0.8, help="drop_rate_model") + parser.add_argument("--num_layers", type=int, default=4, help="number of layers") + parser.add_argument("--heads", type=int, default=8, help="number of heads for stablization") + parser.add_argument("--groups", type=int, default=16, help="number of groups") + parser.add_argument("--l2_coef", type=float, default=5e-5, help="l2 loss coeficient") + parser.add_argument('--dataset', type=str, default='cora', help='dataset') + parser.add_argument("--dataset_path", type=str, default=r'', help="path to save dataset") + parser.add_argument("--best_model_path", type=str, default=r'./', help="path to save best model") + parser.add_argument("--self_loops", type=int, default=1, help="number of graph self-loop") + parser.add_argument("--gpu", type=int, default=6) + + args = parser.parse_args() + if args.gpu >= 0: + tlx.set_device("GPU", args.gpu) + else: + tlx.set_device("CPU") + + main(args) diff --git a/examples/dna/readme.md b/examples/dna/readme.md new file mode 100644 index 00000000..e672b552 --- /dev/null +++ b/examples/dna/readme.md @@ -0,0 +1,45 @@ +# JUST JUMP: DYNAMIC NEIGHBORHOOD AGGREGATION IN GRAPH NEURAL NETWORKS (DNA) + +- Paper link: [https://arxiv.org/abs/1904.04849](https://arxiv.org/abs/1904.04849) +- The implementation of PyG: [https://github.com/pyg-team/pytorch_geometric/blob/master/examples/dna.py](https://github.com/pyg-team/pytorch_geometric/blob/master/examples/dna.py). + +# Dataset Statics + +| Dataset | # Nodes | # Edges | # Classes | +|----------|---------|---------|-----------| +| Cora | 2,708 | 10,556 | 7 | +| Citeseer | 3,327 | 9,228 | 6 | +| Pubmed | 19,717 | 88,651 | 3 | + +Refer to [Planetoid](https://gammagl.readthedocs.io/en/latest/api/gammagl.datasets.html#gammagl.datasets.Planetoid). + +Results +------- + +```bash +# available dataset: "cora", "citeseer", "pubmed" + +TL_BACKEND="torch" python dna_trainer.py --dataset cora --lr 0.01 --drop_rate_conv 0.2 --drop_rate_model 0.8 --num_layers 3 --heads 64 --groups 1 --l2_coef 5e-5 --hidden_dim 256 +TL_BACKEND="torch" python dna_trainer.py --dataset cora --lr 0.01 --drop_rate_conv 0.2 --drop_rate_model 0.9 --num_layers 3 --heads 32 --groups 8 --l2_coef 5e-5 --hidden_dim 256 +TL_BACKEND="torch" python dna_trainer.py --dataset cora --lr 0.01 --drop_rate_conv 0.1 --drop_rate_model 0.8 --num_layers 4 --heads 8 --groups 16 --l2_coef 5e-5 --hidden_dim 128 +TL_BACKEND="torch" python dna_trainer.py --dataset citeseer --lr 0.01 --drop_rate_conv 0.2 --drop_rate_model 0.8 --num_layers 3 --heads 32 --groups 1 --l2_coef 5e-5 --hidden_dim 128 +TL_BACKEND="torch" python dna_trainer.py --dataset citeseer --lr 0.01 --drop_rate_conv 0.1 --drop_rate_model 0.8 --num_layers 4 --heads 8 --groups 8 --l2_coef 5e-5 --hidden_dim 128 +TL_BACKEND="torch" python dna_trainer.py --dataset citeseer --lr 0.01 --drop_rate_conv 0.1 --drop_rate_model 0.8 --num_layers 4 --heads 8 --groups 16 --l2_coef 5e-5 --hidden_dim 128 +TL_BACKEND="torch" python dna_trainer.py --dataset pubmed --lr 0.01 --drop_rate_conv 0.1 --drop_rate_model 0.8 --num_layers 4 --heads 8 --groups 1 --l2_coef 5e-5 --hidden_dim 128 +TL_BACKEND="torch" python dna_trainer.py --dataset pubmed --lr 0.01 --drop_rate_conv 0.1 --drop_rate_model 0.8 --num_layers 4 --heads 8 --groups 8 --l2_coef 5e-5 --hidden_dim 128 +TL_BACKEND="torch" python dna_trainer.py --dataset pubmed --lr 0.01 --drop_rate_conv 0.1 --drop_rate_model 0.8 --num_layers 4 --heads 8 --groups 16 --l2_coef 5e-5 --hidden_dim 128 +``` + +| Dataset | group | Paper | Our(th) | +| -------- | ----- | ---------- | ---------- | +| cora | 1 | 83.88±0.50 | 80.50±0.81 | +| cora | 8 | 85.86±0.45 | 81.22±0.18 | +| cora | 16 | 86.15±0.57 | 82.25±0.4 | +| citeseer | 1 | 73.37±0.83 | 71.41±1.02 | +| citeseer | 8 | 74.19±0.66 | 72.29±0.59 | +| citeseer | 16 | 74.50±0.62 | 72.99±0.68 | +| pubmed | 1 | 87.80±0.25 | 87.32±0.52 | +| pubmed | 8 | 88.04±0.17 | 87.46±0.25 | +| pubmed | 16 | 88.04±0.22 | 87.49±0.11 | + +![image-20240703225836573](readme.assets/image-20240703225836573.png) diff --git a/gammagl/layers/conv/__init__.py b/gammagl/layers/conv/__init__.py index 0162a6cb..84c89697 100644 --- a/gammagl/layers/conv/__init__.py +++ b/gammagl/layers/conv/__init__.py @@ -33,6 +33,7 @@ from .magcl_conv import MAGCLConv from .fusedgat_conv import FusedGATConv from .hid_conv import Hid_conv +from .dna_conv import DNAConv __all__ = [ 'MessagePassing', 'GCNConv', @@ -68,7 +69,8 @@ 'MAGCLConv', 'FusedGATConv', 'Hid_conv', - 'HEATlayer' + 'HEATlayer', + 'DNAConv' ] classes = __all__ diff --git a/gammagl/layers/conv/dna_conv.py b/gammagl/layers/conv/dna_conv.py new file mode 100644 index 00000000..968cd36c --- /dev/null +++ b/gammagl/layers/conv/dna_conv.py @@ -0,0 +1,213 @@ +import tensorlayerx as tlx +from gammagl.layers.conv import MessagePassing +from gammagl.utils import calc_gcn_norm, add_self_loops +from gammagl.utils.num_nodes import maybe_num_nodes +import math +import numpy as np + + +class Linear(tlx.nn.Module): + def __init__(self, in_channels, out_channels, groups=1, bias=True): + super(Linear, self).__init__() + assert in_channels % groups == 0 and out_channels % groups == 0 + + self.in_channels = in_channels + self.out_channels = out_channels + self.groups = groups + + shape = (groups, in_channels // groups, out_channels // groups)[::-1] + self.weight = self._get_weights('weights', shape=shape, init=tlx.initializers.he_uniform(a=math.sqrt(5))) + + if bias: + initor = tlx.initializers.RandomUniform(0, 1) + self.bias = tlx.nn.Parameter(tlx.random_uniform(shape = (self.out_channels,))) + else: + self.bias = None + + def forward(self, src): + if self.groups > 1: + src_shape = tlx.get_tensor_shape(src)[:-1] + src = tlx.reshape(src, (-1, self.groups, self.in_channels // self.groups)) + src = tlx.transpose(src, (1, 0, 2)) + out = tlx.matmul(src, self.weight) + out = tlx.transpose(out, (1, 0, 2)) + out = tlx.reshape(out, tuple(src_shape) + (self.out_channels,)) + else: + out = tlx.matmul(src, tlx.squeeze(self.weight, axis=0)) + + if self.bias is not None: + out += self.bias + + return out + + +def restricted_softmax(src, dim: int = -1, margin: float = 0.): + src_max = tlx.reduce_max(src, axis=dim, keepdims=True) + src_max = np.clip(tlx.convert_to_numpy(src_max), 0, None) + src_max = tlx.convert_to_tensor(src_max) + out = tlx.exp(src - src_max) + out = out / (tlx.reduce_sum(out, axis=dim, keepdims=True) + tlx.exp(margin - src_max)) + + return out + +class Attention(tlx.nn.Module): + def __init__(self, dropout=0): + super(Attention, self).__init__() + self.dropout = tlx.nn.Dropout(p = dropout) + + def forward(self, query, key, value): + return self.compute_attention(query, key, value) + + def compute_attention(self, query, key, value): + # query: [*, query_entries, dim_k] + # key: [*, key_entries, dim_k] + # value: [*, key_entries, dim_v] + # Output: [*, query_entries, dim_v] + + assert query.ndim == key.ndim == value.ndim >= 2 + assert query.shape[-1] == key.shape[-1] + assert key.shape[-2] == value.shape[-2] + + score = tlx.matmul(query, tlx.ops.transpose(key, (0, 1, 3, 2))) + score = score / math.sqrt(key.shape[-1]) + score = restricted_softmax(score, dim=-1) + score = self.dropout(score) + + return tlx.matmul(score, value) + + +class MultiHead(Attention): + def __init__(self, in_channels, out_channels, heads=1, groups=1, dropout=0, bias=True): + super().__init__(dropout=dropout) + + self.in_channels = in_channels + self.out_channels = out_channels + self.heads = heads + self.groups = groups + self.bias = bias + + assert in_channels % heads == 0 and out_channels % heads == 0 + assert in_channels % groups == 0 and out_channels % groups == 0 + assert max(groups, heads) % min(groups, heads) == 0 + + + self.lin_q = Linear(in_channels, out_channels, groups=groups, bias=bias) + self.lin_k = Linear(in_channels, out_channels, groups=groups, bias=bias) + self.lin_v = Linear(in_channels, out_channels, groups=groups, bias=bias) + + def forward(self, query, key, value): + query = self.lin_q(query) + key = self.lin_k(key) + value = self.lin_v(value) + + batch_size = tlx.get_tensor_shape(query)[:-2] + + out_channels_per_head = self.out_channels // self.heads + + query_size = tuple(batch_size) + (query.shape[-2], self.heads, out_channels_per_head) + query = tlx.ops.reshape(query, query_size) + query = tlx.ops.transpose(query, (0, 2, 1, 3)) + + key_size = tuple(batch_size) + (key.shape[-2], self.heads, out_channels_per_head) + key = tlx.ops.reshape(key, key_size) + key = tlx.ops.transpose(key, (0, 2, 1, 3)) + + value_size = tuple(batch_size) + (value.shape[-2], self.heads, out_channels_per_head) + value = tlx.ops.reshape(value, value_size) + value = tlx.ops.transpose(value, (0, 2, 1, 3)) + + out = self.compute_attention(query, key, value) + + out = tlx.transpose(out, (0, 2, 1, 3)) + out = tlx.reshape(out, tuple(batch_size) + (query.shape[-2], self.out_channels)) + + return out + + +class DNAConv(MessagePassing): + r"""The dynamic neighborhood aggregation operator from the `"Just Jump: + Towards Dynamic Neighborhood Aggregation in Graph Neural Networks" + `_ paper. + + .. math:: + \mathbf{x}_v^{(t)} = h_{\mathbf{\Theta}}^{(t)} \left( \mathbf{x}_{v + \leftarrow v}^{(t)}, \left\{ \mathbf{x}_{v \leftarrow w}^{(t)} : w \in + \mathcal{N}(v) \right\} \right) + + based on (multi-head) dot-product attention + + .. math:: + \mathbf{x}_{v \leftarrow w}^{(t)} = \textrm{Attention} \left( + \mathbf{x}^{(t-1)}_v \, \mathbf{\Theta}_Q^{(t)}, [\mathbf{x}_w^{(1)}, + \ldots, \mathbf{x}_w^{(t-1)}] \, \mathbf{\Theta}_K^{(t)}, \, + [\mathbf{x}_w^{(1)}, \ldots, \mathbf{x}_w^{(t-1)}] \, + \mathbf{\Theta}_V^{(t)} \right) + + with :math:`\mathbf{\Theta}_Q^{(t)}, \mathbf{\Theta}_K^{(t)}, + \mathbf{\Theta}_V^{(t)}` denoting (grouped) projection matrices for query, + key and value information, respectively. + :math:`h^{(t)}_{\mathbf{\Theta}}` is implemented as a non-trainable + version of :class:`torch_geometric.nn.conv.GCNConv`. + + .. note:: + In contrast to other layers, this operator expects node features as + shape :obj:`[num_nodes, num_layers, channels]`. + + Parameters + ---------- + channels: int + Size of each input/output sample. + heads: int, optional + Number of multi-head-attentions. + (default: :obj:`1`) + groups: int, optional + Number of groups to use for all linear projections. + (default: :obj:`1`) + dropout: float, optional + Dropout probability of attention coefficients. + (default: :obj:`0.`) + normalize: bool, optional + Whether to add self-loops and apply symmetric normalization. + (default: :obj:`True`) + add_self_loops: bool, optional + If set to :obj:`False`, will not add self-loops to the input graph. + (default: :obj:`True`) + bias: bool, optional + If set to :obj:`False`, the layer will not learn an additive bias. + (default: :obj:`True`) + + Shapes: + - **input:** + node features :math:`(|\mathcal{V}|, L, F)` where :math:`L` is the + number of layers, + edge indices :math:`(2, |\mathcal{E}|)` + - **output:** node features :math:`(|\mathcal{V}|, F)` + """ + + def __init__(self, channels: int, heads: int = 1, groups: int = 1, + dropout: float = 0., normalize: bool = True, add_self_loops: bool = True, + bias: bool = True): + super().__init__() + + self.bias = bias + self.normalize = normalize + self.add_self_loops = add_self_loops + + self.multi_head = MultiHead(channels, channels, heads, groups, dropout, bias) + + def forward(self, x, edge_index, edge_weight = None): + if self.normalize and edge_weight == None: + edge_index, edge_weight = add_self_loops(edge_index) + edge_weight = calc_gcn_norm(edge_index=edge_index, num_nodes=maybe_num_nodes(edge_index), edge_weight=edge_weight) + else: + edge_weight = tlx.ones((edge_index.shape[1],)) + + return self.propagate(edge_index=edge_index, x=x, edge_weight=edge_weight) + + def message(self, x, edge_index, edge_weight=None): + x_i = tlx.gather(x, edge_index[0, :]) + x_j = tlx.gather(x, edge_index[1, :]) + + x_i = x_i[:, -1:] + out = self.multi_head(x_i, x_j, x_j) + return tlx.reshape(edge_weight, (-1, 1)) * tlx.squeeze(out, axis=1) diff --git a/gammagl/models/__init__.py b/gammagl/models/__init__.py index c08b85c0..9d564bfb 100644 --- a/gammagl/models/__init__.py +++ b/gammagl/models/__init__.py @@ -57,6 +57,7 @@ from .fusedgat import FusedGATModel from .hid_net import Hid_net from .gnnlfhf import GNNLFHFModel +from .dna import DNAModel __all__ = [ 'HeCo', @@ -117,7 +118,8 @@ 'FusedGATModel', 'hid_net', 'HEAT', - 'GNNLFHFModel' + 'GNNLFHFModel', + 'DNAModel' ] classes = __all__ diff --git a/gammagl/models/dna.py b/gammagl/models/dna.py new file mode 100644 index 00000000..448eed3d --- /dev/null +++ b/gammagl/models/dna.py @@ -0,0 +1,32 @@ +import tensorlayerx as tlx +from gammagl.layers.conv import DNAConv +import math + + +class DNAModel(tlx.nn.Module): + def __init__(self, in_channels, hidden_channels, out_channels, num_layers, drop_rate_conv = 0.2, drop_rate_model = 0.8, + heads=1, groups=1, name = None): + super().__init__(name=name) + self.hidden_channels = hidden_channels + self.lin1 = tlx.nn.Linear(in_features=in_channels, out_features=hidden_channels, W_init=tlx.nn.he_uniform(a=math.sqrt(5))) + self.convs = tlx.nn.ModuleList() + for i in range(num_layers): + self.convs.append( + DNAConv(hidden_channels, heads, groups, dropout=drop_rate_conv)) + self.lin2 = tlx.nn.Linear(in_features=hidden_channels, out_features=out_channels) + self.relu = tlx.nn.ReLU() + self.dropout = tlx.nn.Dropout(p=drop_rate_model) + + def forward(self, x, edge_index): + x = self.relu(self.lin1(x)) + x = self.dropout(x) + x_all = tlx.reshape(x, (-1, 1, self.hidden_channels)) + for conv in self.convs: + x = self.relu(conv(x_all, edge_index)) + x = tlx.reshape(x, (-1, 1, self.hidden_channels)) + x_all = tlx.concat([x_all, x], axis=1) + x = x_all[:, -1] + x = self.dropout(x) + x = self.lin2(x) + + return tlx.logsoftmax(x, dim=1)