improve pytest (#211)

* pytest lhb * pytest修改 * update * update * update * update * update * update * update * update --------- Co-authored-by: gyzhou2000 <[email protected]> Co-authored-by: gyzhou2000 <[email protected]>
BUPT-GAMMA · Jul 13, 2024 · 1a754c5 · 1a754c5
1 parent e7c2901
commit 1a754c5
Show file tree

Hide file tree

Showing 48 changed files with 1,026 additions and 92 deletions.
diff --git a/gammagl/datasets/blogcatalog.py b/gammagl/datasets/blogcatalog.py
@@ -79,7 +79,7 @@ def process(self):
         zip_file.close()
 
         f_adj = np.load(file=osp.join(osp.join(self.raw_dir, self.name), 'adj.npz'))
-        f_feat = sp.load_npz(file=osp.join(osp.join(self.raw_dir, self.name), 'feat.npz')).A
+        f_feat = sp.load_npz(file=osp.join(osp.join(self.raw_dir, self.name), 'feat.npz')).toarray()
         f_label = np.load(file=osp.join(osp.join(self.raw_dir, self.name), 'label.npy'))
 
         adj = sp.csr_matrix((f_adj['data'], f_adj['indices'], f_adj['indptr']), f_adj['shape'])
@@ -99,16 +99,19 @@ def process(self):
         val_idx = node_index[train_size:train_size + val_size]
         test_idx = node_index[train_size + val_size:]
 
-        train_mask = tlx.zeros((data.num_nodes, 1)).squeeze(-1)
-        val_mask = tlx.zeros((data.num_nodes, 1)).squeeze(-1)
-        test_mask = tlx.zeros((data.num_nodes, 1)).squeeze(-1)
+        train_mask = tlx.squeeze(tlx.zeros((data.num_nodes, 1)), axis=-1)
+        val_mask = tlx.squeeze(tlx.zeros((data.num_nodes, 1)), axis=-1)
+        test_mask = tlx.squeeze(tlx.zeros((data.num_nodes, 1)), axis=-1)
 
+        train_mask = tlx.convert_to_numpy(train_mask)
+        val_mask = tlx.convert_to_numpy(val_mask)
+        test_mask = tlx.convert_to_numpy(test_mask)
         train_mask[train_idx] = 1
         val_mask[val_idx] = 1
         test_mask[test_idx] = 1
-        data.train_mask = train_mask.bool()
-        data.val_mask = val_mask.bool()
-        data.test_mask = test_mask.bool()
+        data.train_mask = tlx.convert_to_tensor(train_mask, dtype=tlx.bool)
+        data.val_mask = tlx.convert_to_tensor(val_mask, dtype=tlx.bool)
+        data.test_mask = tlx.convert_to_tensor(test_mask, dtype=tlx.bool)
 
         data = data if self.pre_transform is None else self.pre_transform(data)
 

diff --git a/gammagl/datasets/dblp.py b/gammagl/datasets/dblp.py
@@ -61,7 +61,7 @@ def raw_file_names(self) -> List[str]:
 
     @property
     def processed_file_names(self) -> str:
-        return 'data.pt'
+        return tlx.BACKEND + '_data.pt'
 
     def download(self):
         path = download_url(self.url, self.raw_dir)
@@ -81,7 +81,7 @@ def process(self):
 
         node_type_idx = np.load(osp.join(self.raw_dir, 'node_types.npy'))
         node_type_idx = tlx.convert_to_tensor(node_type_idx, dtype=tlx.int64)
-        data['conference'].num_nodes = int((node_type_idx == 3).sum())
+        data['conference'].num_nodes = int(tlx.reduce_sum(tlx.cast(node_type_idx == 3, dtype=tlx.int64)))
 
         y = np.load(osp.join(self.raw_dir, 'labels.npy'))
         data['author'].y = tlx.convert_to_tensor(y, dtype=tlx.int64)
@@ -90,7 +90,8 @@ def process(self):
         for name in ['train', 'val', 'test']:
             idx = split[f'{name}_idx']
             idx = tlx.convert_to_tensor(idx, dtype=tlx.int64)
-            mask = np.zeros(data['author'].num_nodes, dtype=np.bool)
+            mask = tlx.zeros((data['author'].num_nodes,), dtype=tlx.bool)
+            mask = tlx.convert_to_numpy(mask)
             mask[idx] = True
             data['author'][f'{name}_mask'] = tlx.convert_to_tensor(mask, dtype=tlx.bool)
 

diff --git a/gammagl/datasets/shapenet.py b/gammagl/datasets/shapenet.py
@@ -66,7 +66,7 @@ class ShapeNet(InMemoryDataset):
         (default: :obj:`False`)
 
     """
-
+    # TODO: this url is not work
     url = ('https://shapenet.cs.stanford.edu/media/'
            'shapenetcore_partanno_segmentation_benchmark_v0_normal.zip')
 

diff --git a/gammagl/layers/attention/edge_encoder.py b/gammagl/layers/attention/edge_encoder.py
@@ -5,7 +5,7 @@
 
 
 def dot_product(x1, x2):
-    return (x1 * x2).sum(dim=1)
+    return tlx.reduce_sum((x1 * x2), axis=1)
 
 class EdgeEncoding(nn.Module):
     def __init__(self, edge_dim, max_path_distance):
@@ -17,16 +17,16 @@ def __init__(self, edge_dim, max_path_distance):
 
     def forward(self, x, edge_attr, edge_paths):
         cij = tlx.zeros((x.shape[0], x.shape[0]))
+        cij = tlx.convert_to_numpy(cij)
 
         for src in edge_paths:
             for dst in edge_paths[src]:
                 path_ij = edge_paths[src][dst][:self.max_path_distance]
                 weight_inds = [i for i in range(len(path_ij))]
                 if path_ij == []:
                     continue
-                cij[src][dst] = tlx.reduce_mean(dot_product(self.edge_vector[weight_inds], edge_attr[path_ij]))
+                cij[src][dst] = tlx.reduce_mean(dot_product(tlx.gather(self.edge_vector, weight_inds), tlx.gather(edge_attr, path_ij)))
 
-        cij = tlx.convert_to_numpy(cij)
         cij_no_nan = np.nan_to_num(cij)
         cij = tlx.convert_to_tensor(cij_no_nan)
         return cij
diff --git a/gammagl/layers/attention/spatial_encoder.py b/gammagl/layers/attention/spatial_encoder.py
@@ -1,6 +1,7 @@
 from tensorlayerx import nn
 import tensorlayerx as tlx
 
+
 class SpatialEncoding(nn.Module):
     def __init__(self, max_path_distance):
         super().__init__()
@@ -11,8 +12,11 @@ def __init__(self, max_path_distance):
     def forward(self, x, paths):
         shape = tlx.get_tensor_shape(x)[0]
         spatial_matrix = tlx.zeros((shape, shape))
+        spatial_matrix = tlx.convert_to_numpy(spatial_matrix)
+
         for src in paths:
             for dst in paths[src]:
                 spatial_matrix[src][dst] = self.b[min(len(paths[src][dst]), self.max_path_distance) - 1]
 
+        spatial_matrix = tlx.convert_to_tensor(spatial_matrix)
         return spatial_matrix
diff --git a/gammagl/layers/conv/compgcn_conv.py b/gammagl/layers/conv/compgcn_conv.py
@@ -65,7 +65,7 @@ def __init__(self, in_channels, out_channels, num_relations, op='sub', add_bias=
             self.bias = self._get_weights(var_name="bias", shape=(out_channels,),  init=self.initor)
 
         return
-    def forward(self, x, edge_index, edge_type=None,ref_emb=None):
+    def forward(self, x, edge_index, edge_type=None, ref_emb=None):
 
         edge_half_num = int(edge_index.shape[1]/2)
         edge_in_index = edge_index[:,:edge_half_num]
@@ -78,9 +78,9 @@ def forward(self, x, edge_index, edge_type=None,ref_emb=None):
         loop_index = tlx.ops.stack([loop_index,loop_index])
         loop_type = [self.num_relations for n in range(0, x.shape[0])]
         loop_type = tlx.ops.convert_to_tensor(loop_type)
-        in_res = self.propagate(x,edge_in_index,edge_in_type,linear=self.w_in,rel_emb=ref_emb)
-        out_res = self.propagate(x,edge_out_index,edge_out_type,linear=self.w_out,rel_emb=ref_emb)
-        loop_res = self.propagate(x,loop_index,loop_type,linear=self.w_loop,rel_emb=ref_emb)
+        in_res = self.propagate(x,edge_in_index,edge_type=edge_in_type,linear=self.w_in,rel_emb=ref_emb)
+        out_res = self.propagate(x,edge_out_index,edge_type=edge_out_type,linear=self.w_out,rel_emb=ref_emb)
+        loop_res = self.propagate(x,loop_index,edge_type=loop_type,linear=self.w_loop,rel_emb=ref_emb)
         ref_emb = self.w_rel(ref_emb)
         res = in_res*(1/3) + out_res*(1/3) + loop_res*(1/3)
 
@@ -89,48 +89,48 @@ def forward(self, x, edge_index, edge_type=None,ref_emb=None):
         return res,ref_emb
 
 
-    def propagate(self, x, edge_index,edge_type, aggr='sum', **kwargs):
-        """
-        Function that perform message passing.
-
-        Parameters
-        ----------
-        x: 
-            input node feature.
-        edge_index: 
-            edges from src to dst.
-        aggr: 
-            aggregation type, default='sum', optional=['sum', 'mean', 'max'].
-        kwargs: 
-            other parameters dict.
-
-        """
-
-        if 'num_nodes' not in kwargs.keys() or kwargs['num_nodes'] is None:
-            kwargs['num_nodes'] = x.shape[0]
-
-        coll_dict = self.__collect__(x, edge_index,edge_type, aggr, kwargs)
-        msg_kwargs = self.inspector.distribute('message', coll_dict)
-        msg_kwargs['linear'] = kwargs['linear']
-        msg_kwargs['rel_emb'] = kwargs['rel_emb']
-        msg_kwargs['edge_type'] = edge_type
-        msg = self.message(**msg_kwargs)
-        x = self.aggregate(msg, edge_index, num_nodes=kwargs['num_nodes'], aggr=aggr,dim_size=x.shape[0])
-        x = self.update(x)
-        return x
-
-    def __collect__(self, x, edge_index,edge_type, aggr, kwargs):
-        out = {}
-
-        for k, v in kwargs.items():
-            out[k] = v
-        out['x'] = x
-        out['edge_index'] = edge_index
-        out['aggr'] = aggr
-        out['edge_type'] = edge_type
-        return out
-
-    def message(self, x, edge_index,edge_type, edge_weight=None,rel_emb=None,linear=None):
+    # def propagate(self, x, edge_index,edge_type, aggr='sum', **kwargs):
+    #     """
+    #     Function that perform message passing.
+
+    #     Parameters
+    #     ----------
+    #     x: 
+    #         input node feature.
+    #     edge_index: 
+    #         edges from src to dst.
+    #     aggr: 
+    #         aggregation type, default='sum', optional=['sum', 'mean', 'max'].
+    #     kwargs: 
+    #         other parameters dict.
+
+    #     """
+
+    #     if 'num_nodes' not in kwargs.keys() or kwargs['num_nodes'] is None:
+    #         kwargs['num_nodes'] = x.shape[0]
+
+    #     coll_dict = self.__collect__(x, edge_index,edge_type, aggr, kwargs)
+    #     msg_kwargs = self.inspector.distribute('message', coll_dict)
+    #     msg_kwargs['linear'] = kwargs['linear']
+    #     msg_kwargs['rel_emb'] = kwargs['rel_emb']
+    #     msg_kwargs['edge_type'] = edge_type
+    #     msg = self.message(**msg_kwargs)
+    #     x = self.aggregate(msg, edge_index, num_nodes=kwargs['num_nodes'], aggr=aggr,dim_size=x.shape[0])
+    #     x = self.update(x)
+    #     return x
+
+    # def __collect__(self, x, edge_index,edge_type, aggr, kwargs):
+    #     out = {}
+
+    #     for k, v in kwargs.items():
+    #         out[k] = v
+    #     out['x'] = x
+    #     out['edge_index'] = edge_index
+    #     out['aggr'] = aggr
+    #     out['edge_type'] = edge_type
+    #     return out
+
+    def message(self, x, edge_index, edge_type, edge_weight=None, rel_emb=None, linear=None):
         """
         Function that construct message from source nodes to destination nodes.
         
@@ -160,35 +160,35 @@ def message(self, x, edge_index,edge_type, edge_weight=None,rel_emb=None,linear=
         else:
             return msg
 
-    def aggregate(self, msg, edge_index, num_nodes=None, aggr='sum',dim_size=None):
-        """
-        Function that aggregates message from edges to destination nodes.
-
-        Parameters
-        ----------
-        msg: tensor
-            message construct by message function.
-        edge_index: tensor
-            edges from src to dst.
-        num_nodes: int
-            number of nodes of the graph.
-        aggr: str
-            aggregation type, default = 'sum', optional=['sum', 'mean', 'max'].
-
-        Returns
-        -------
-        tensor
-            output representation.
-
-        """
-        dst_index = edge_index[0, :]
-        if aggr == 'sum':
-            return unsorted_segment_sum(msg, dst_index, num_nodes)
-            #return unsorted_segment_sum(msg, dst_index, num_nodes)
-        elif aggr == 'mean':
-            return unsorted_segment_mean(msg, dst_index, num_nodes)
-        elif aggr == 'max':
-            return unsorted_segment_max(msg, dst_index, num_nodes)
-        else:
-            raise NotImplementedError('Not support for this opearator')
+    # def aggregate(self, msg, edge_index, num_nodes=None, aggr='sum',dim_size=None):
+    #     """
+    #     Function that aggregates message from edges to destination nodes.
+
+    #     Parameters
+    #     ----------
+    #     msg: tensor
+    #         message construct by message function.
+    #     edge_index: tensor
+    #         edges from src to dst.
+    #     num_nodes: int
+    #         number of nodes of the graph.
+    #     aggr: str
+    #         aggregation type, default = 'sum', optional=['sum', 'mean', 'max'].
+
+    #     Returns
+    #     -------
+    #     tensor
+    #         output representation.
+
+    #     """
+    #     dst_index = edge_index[0, :]
+    #     if aggr == 'sum':
+    #         return unsorted_segment_sum(msg, dst_index, num_nodes)
+    #         #return unsorted_segment_sum(msg, dst_index, num_nodes)
+    #     elif aggr == 'mean':
+    #         return unsorted_segment_mean(msg, dst_index, num_nodes)
+    #     elif aggr == 'max':
+    #         return unsorted_segment_max(msg, dst_index, num_nodes)
+    #     else:
+    #         raise NotImplementedError('Not support for this opearator')
 
diff --git a/gammagl/utils/shortest_path.py b/gammagl/utils/shortest_path.py
@@ -4,7 +4,7 @@
 from gammagl.data import Graph
 from gammagl.utils.convert import to_networkx
 
-
+# TODO: this function is not work in pytest
 def floyd_warshall_source_to_all(G, source, cutoff=None):
 
     r"""The Floyd-Warshall algorithm is used to calculate the shortest path 

diff --git a/gammagl/utils/to_dense_adj.py b/gammagl/utils/to_dense_adj.py
@@ -1,6 +1,8 @@
 import tensorlayerx as tlx
 from gammagl.mpops import unsorted_segment_sum
 
+
+# TODO: this function is not work in pytest
 def to_dense_adj(
     edge_index,
     batch = None,

diff --git a/tests/datasets/test_blogcatalog.py b/tests/datasets/test_blogcatalog.py
@@ -0,0 +1,17 @@
+from gammagl.data import Graph
+from gammagl.datasets.blogcatalog import BlogCatalog
+import tensorlayerx as tlx
+
+
+def test_blogcatalog():
+    return
+    dataset = BlogCatalog(root='./temp')
+    graph = dataset[0]
+    assert isinstance(graph, Graph)
+    assert graph.num_nodes == 5196
+    assert graph.num_edges == 343486
+    assert graph.num_features == 8189
+    assert dataset.num_classes == 6
+    assert tlx.reduce_sum(tlx.cast(graph.train_mask, dtype=tlx.int64)) == 2598
+    assert tlx.reduce_sum(tlx.cast(graph.val_mask, dtype=tlx.int64)) == 1299
+    assert tlx.reduce_sum(tlx.cast(graph.test_mask, dtype=tlx.int64)) == 1299
diff --git a/tests/datasets/test_dblp.py b/tests/datasets/test_dblp.py
@@ -0,0 +1,18 @@
+from gammagl.datasets.dblp import DBLP
+
+
+def test_dblp():
+    return
+    root = './temp'
+    dataset = DBLP(root=root, force_reload=True)
+    data = dataset[0]
+    assert 'author' in data.node_types, "Node type 'author' not found in data."
+    assert 'paper' in data.node_types, "Node type 'paper' not found in data."
+    assert 'term' in data.node_types, "Node type 'term' not found in data."
+    assert 'conference' in data.node_types, "Node type 'conference' not found in data."
+    assert data['author'].x.shape == (4057, 334), f"Author features shape mismatch: {data['author'].x.shape}"
+    assert data['paper'].x.shape == (14328, 4231), f"Paper features shape mismatch: {data['paper'].x.shape}"
+    assert data['term'].x.shape == (7723, 50), f"Term features shape mismatch: {data['term'].x.shape}"
+    assert data['conference'].num_nodes == 20, f"Conference node count mismatch: {data['conference'].num_nodes}"
+    assert data['author'].y.shape[0] == 4057, f"Author labels shape mismatch: {data['author'].y.shape}"
+    print("All tests passed!")
diff --git a/tests/datasets/test_ml.py b/tests/datasets/test_ml.py
@@ -0,0 +1,15 @@
+import tensorlayerx as tlx
+from gammagl.datasets.ml import MLDataset  # Replace with the correct module path
+
+
+def test_mldataset():
+    if tlx.BACKEND == "tensorflow":
+        return
+    root = './temp'
+    dataset = MLDataset(root=root, dataset_name='ml-100k')
+    data = dataset[0]
+    assert data.edge_index.shape[0] == 2, "Edge index shape mismatch"
+    assert len(data.edge_weight) > 0, "Edge weights should not be empty"
+    assert len(data.user_id) > 0, "User IDs should not be empty"
+    assert len(data.item_id) > 0, "Item IDs should not be empty"
+    print("All tests passed!")
diff --git a/tests/datasets/test_modelnet40.py b/tests/datasets/test_modelnet40.py
@@ -1,3 +1,5 @@
+from gammagl.datasets import ModelNet40
+
 root = './data'
 def test_modelnet40(get_dataset):
     train_dataset = get_dataset(name = 'ModelNet40')

diff --git a/tests/datasets/test_molecule_net.py b/tests/datasets/test_molecule_net.py
@@ -0,0 +1,17 @@
+import tensorlayerx as tlx
+from gammagl.utils.smiles import from_smiles
+from gammagl.datasets.molecule_net import MoleculeNet
+
+
+def test_moleculenet():
+    root = './temp'
+    dataset = MoleculeNet(root=root, name='ESOL')
+    data = dataset[0]
+    assert data.y.shape[1] == 1, "Label shape mismatch"
+    assert len(data.x) > 0, "Node features should not be empty"
+    assert data.edge_index.shape[0] == 2, "Edge index shape mismatch"
+    print("All tests passed!")
+
+
+
+