add M1.yml install file, fix typo in readme

snap-stanford · Mar 30, 2023 · edb969b · edb969b
1 parent 9c9c1ca
commit edb969b
Show file tree

Hide file tree

Showing 2 changed files with 213 additions and 0 deletions.
diff --git a/readme_roland.md b/readme_roland.md
@@ -0,0 +1,138 @@
+# Roland
+
+## Dataset Format
+
+The dataset should be prepared as a `tsv` file (`csv` is also supported with only minor midification to the loader, add keyword arg `sep=','` to the `dd.read_csv` method), with the first row as column names.
+
+Check `TODO:` 's in the `roland_generic` laoder to adapt it to your own dataset.
+
+The following columns are required to construct a basic dynamic graph.
+
+**Required Fields (Columns)**
+
+* `SRC_NODE` and `DST_NODE`: unique IDs of individuals. In BSI dataset, these two columns are named as `Payer` and `Payee`
+* `TIMESTAMP`: a timestamp (integer), e.g., `1230681600` denotes `2008-12-30T16:00:00`. In BSI dataset, this column is named as `Timestamp`.
+* `AMOUNT`: transaction amount, this column is named as `AmountEUR` in BSI dataset.
+
+**Optional Fields (Columns)**
+
+* For node features, such as the country of company, add `SRC_NODECompany` and `DST_NODECompany` columns to the dataset. In BSI dataset, the company columns are `PayerCompany` and `PayeeCompany`.
+* For edge features associated with transactions, such as the currency used in this transaction, simply add `Currency` column to the dataset.
+
+## Example Dataset (BSI)
+
+* See `./GraphGym_dev/run/datasets/bsi_synthetic.tsv` for an example of BSI dataset.
+
+## Run Our Examples on BSI Dataset
+
+1. Since the BSI dataset is confidential, we generated a synthetic version of it for demonstration purpose. The synthetic sample has exactly the same format as BSI dataset except it’s generated randomly, so you should **NOT** expect any algorithm to achieve any nontrivial out of sample accuracy on it. Feel free to modify the gen sample script to change the size of generated transaction graphs. **Firstly, generte a systhetic BSI dataset**, there should be one there already located at: `GraphGym_dev/run/datasets/bsi_synthetic.tsv`. You can regenerate the dataset using the following code.
+
+    ```bash
+    cd GraphGym_dev/run/datasets
+    python ./syn_bsi.py
+    ```
+
+2. Use the predefined YAML and RUN files, see `/GraphGym_dev/run/run_single_example.sh`.
+
+    Here the **ordinary recurrent GNN** denotes models based on homogenous graphs (i.e., graph without edge/node types), **complete heterogenous RGNN** contains separate networks for message types `(sender_type, edge_type, receiver_type)`, so there are `NumNodeTypes*NumEdgeTypes*NumNodeTypes` internal GNNs. The **partial heterogenous GNN** only consists of `NumNodeTypes` node feature extractors and `NumEdgeTypes` edge feature extractors.
+
+    ```bash
+    cd ./GraphGym_dev/run/
+    # Ordinal recurrent GNN based on homogenous graph.
+    python main.py --cfg configs/roland/examples/gnn_recurrent_example.yaml --repeat 1
+    # Complete heterogenous RGNN.
+    python main.py --cfg configs/roland/examples/complete_hete_example.yaml --repeat 1
+    # Partial heterogenous RGNN.
+    python main.py --cfg configs/roland/examples/partial_hete_example.yaml --repeat 1
+    ```
+
+## Run Models On Your Own Dataset
+
+To deploy existing models on your own datasets, you would need to:
+
+1. Make a copy of the generic loader at `GraphGym_dev/graphgym/contrib/loader/roland_generic.py`, modify all `TODO` in the python file to make it compatiable with your own dataset. (see section below.)
+
+2. Create the corresponding YAML and RUN files:
+
+    ```bash
+    cd ./GraphGym_dev/run/
+    python main.py --cfg YOUR_CONFIG.yaml --repeat 1
+    ```
+
+## Modify `yaml` Configuration Files
+
+Here we provide a detailed example explaining how to modify a config yaml file. In most cases, you only need to modify a few lines to make it work on your own dataset. Here we only include fields needed to be change.
+
+```yaml
+out_dir: results
+device: auto  # {'cpu', 'gpu', 'auto'}
+dataset:
+  format: transaction_hetero_v1  # the format needs to be compatiable with the loader.
+  name: bsi_synthetic.tsv  # file name of the transaction dataset.
+  is_hetero: True
+  dir: /lfs/hyperturing2/0/tianyudu/GraphGym_dev/run/datasets  # dataset directory.
+  task: link_pred
+  shuffle: True  # must set to False to if using time series data.
+  task_type: classification
+  transductive: True
+  split: [0.8, 0.1, 0.1]
+  augment_feature: []
+  augment_feature_dims: [0]
+  augment_feature_repr: position
+  augment_label: ''
+  augment_label_dims: 0
+  transform: none
+  edge_encoder: True
+  edge_encoder_name: roland
+  edge_encoder_bn: True
+  node_encoder: False
+  node_encoder_name: roland
+  node_encoder_bn: True
+transaction:
+  keep_ratio: linear
+  snapshot: True
+  snapshot_freq: M  # M=monthly, W=weekly, D=daily.
+  check_snapshot: False
+  history: rolling
+  horizon: 1
+  pred_mode: at
+  loss: supervised
+  feature_int_dim: 16  # number of categorical edge features.
+  # number of unique values for each categorical edge feature, for example, 1017 means the first categorical
+  # edge feature (which is PayerBank in BSI dataset) has 1017 unique values. This needs to be modified based on
+  # the dataset.
+  feature_edge_int_num: [1017, 1018, 33, 33, 13, 13, 23, 23, 86, 86, 5, 5, 9, 9, 1, 1]
+  feature_node_int_num: [1, 1]
+  feature_amount_dim: 16
+  feature_time_dim: 16
+train:
+  batch_size: 32
+  eval_period: 5
+  ckpt_period: 400
+  # See ./GraphGym_dev/graphgym/contrib/train/ for avaliable for `mode`.
+  mode: new_hetero  # which training module to use.
+model:
+  # See ./GraphGym_dev/graphgym/contrib/network/ for all options.
+  type: hetero_gnn_recurrent
+  loss_fun: cross_entropy
+  edge_decoding: concat  # Only use node embeddings.
+  graph_pooling: add
+gnn:
+  layers_pre_mp: 2  # number of fully-connected before GNN.
+  layers_mp: 2  # number of GNN layers.
+  layers_post_mp: 2  # number of fully-connected after GNN.
+  dim_inner: 128  # dimension of hidden layers in GNN.
+  # See ./GraphGym_dev/graphgym/contrib/layer/ for all options.
+  layer_type: generaledgeheteconv_complete
+  stage_type: stack
+  batchnorm: True
+  act: prelu
+  dropout: 0.0
+  agg: add
+  att_heads: 4
+  normalize_adj: False
+optim:
+  optimizer: adam
+  base_lr: 0.01
+  max_epoch: 100
+```
diff --git a/roland_environment_M1.yml b/roland_environment_M1.yml
@@ -0,0 +1,75 @@
+# yml file to install ROLAND Conda environment on Apple Silicon macbooks. 
+# also works to install stable version of Graphgym, for a quick and easy Conda Setup.
+name: roland
+channels:
+  - pyg
+  - pytorch
+  - conda-forge
+  - nvidia
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - absl-py=1.0.0=pyhd8ed1ab_0
+  - aiosignal=1.2.0=pyhd8ed1ab_0
+  - async-timeout=4.0.2=pyhd8ed1ab_0
+  - attrs=21.4.0=pyhd8ed1ab_0
+  - blas=1.0=mkl
+  - blinker=1.4=py_1
+  - cachetools=5.0.0=pyhd8ed1ab_0
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - cloudpickle=2.0.0=pyhd3eb1b0_0
+  - cycler=0.11.0=pyhd3eb1b0_0
+  - dask-glm=0.2.0=py_1
+  - dask-ml=2022.5.27=pyhd8ed1ab_0
+  - fonttools=4.25.0=pyhd3eb1b0_0
+  - google-auth=2.6.6=pyh6c4a22f_0
+  - google-auth-oauthlib=0.4.6=pyhd8ed1ab_0
+  - heapdict=1.0.1=pyhd3eb1b0_0
+  - idna=3.3=pyhd3eb1b0_0
+  - jinja2=3.0.3=pyhd3eb1b0_0
+  - joblib=1.1.0=pyhd3eb1b0_0
+  - markdown=3.3.7=pyhd8ed1ab_0
+  - multipledispatch=0.6.0=py_0
+  - munkres=1.1.4=py_0
+  - networkx=2.7.1=pyhd3eb1b0_0
+  - oauthlib=3.2.0=pyhd8ed1ab_0
+  - packaging=21.3=pyhd3eb1b0_0
+  - partd=1.2.0=pyhd3eb1b0_1
+  - pyasn1=0.4.8=py_0
+  - pyasn1-modules=0.2.7=py_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pyjwt=2.4.0=pyhd8ed1ab_0
+  - pyopenssl=22.0.0=pyhd3eb1b0_0
+  - pyparsing=3.0.4=pyhd3eb1b0_0
+  - python-dateutil=2.8.2=pyhd3eb1b0_0
+  - python-louvain=0.15=pyhd3eb1b0_0
+  - python_abi=3.9=2_cp39
+  - pytorch-mutex=1.0=cuda
+  - pytz=2021.3=pyhd3eb1b0_0
+  - pyu2f=0.1.5=pyhd8ed1ab_0
+  - requests=2.27.1=pyhd3eb1b0_0
+  - requests-oauthlib=1.3.1=pyhd8ed1ab_0
+  - rsa=4.8=pyhd8ed1ab_0
+  - seaborn=0.11.2=pyhd3eb1b0_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sortedcontainers=2.4.0=pyhd3eb1b0_0
+  - tblib=1.7.0=pyhd3eb1b0_0
+  - tensorboard=2.9.0=pyhd8ed1ab_0
+  - tensorboard-plugin-wit=1.8.1=pyhd8ed1ab_0
+  - threadpoolctl=2.2.0=pyh0d69192_0
+  - toolz=0.11.2=pyhd3eb1b0_0
+  - typing-extensions=4.1.1=hd3eb1b0_0
+  - typing_extensions=4.1.1=pyh06a4308_0
+  - tzdata=2022a=hda174b7_0
+  - werkzeug=2.1.2=pyhd8ed1ab_1
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - yacs=0.1.6=pyhd3eb1b0_1
+  - zict=2.0.0=pyhd3eb1b0_0
+  - zipp=3.8.0=pyhd8ed1ab_0
+  - pip:
+    - littleutils==0.2.2
+    - ogb==1.3.3
+    - outdated==0.2.1
+    - protobuf==4.21.1
+    - tensorboardx==2.5
+prefix: /home/tianyudu/anaconda3/envs/rolanden