From b0dd3ab71fb3475a1eeff42258030aa35c6068a3 Mon Sep 17 00:00:00 2001
From: mufeili <mufeili1996@gmail.com>
Date: Fri, 22 Jan 2021 02:11:41 +0800
Subject: [PATCH 1/4] Update

---
 README.md               | 4 ++++
 examples/README.md      | 2 ++
 examples/fingerprint.py | 6 +++---
 examples/gnn.py         | 4 ++--
 examples/utils.py       | 6 +++++-
 5 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 389b5b7..52192b6 100644
--- a/README.md
+++ b/README.md
@@ -24,3 +24,7 @@
 | ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
 | 1    | Random Forest | 1024-bit ECFP4 | 0.9540 +- 0.0038 | 0.9062 +- 0.0079   | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 30th, 2020 |
 | 2    | GCN           | GraphConv      | 0.9214 +- 0.0106 | 0.9445 +- 0.0049   | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples)                            | Dec 30th, 2020 |
+
+### ClinTox
+
+| Rank | Model         | Featurization  | Test ROC-AUC     | Validation ROC-AUC | Contact                           | References	                                                                           | Date           |
diff --git a/examples/README.md b/examples/README.md
index 8621cbf..f9c95d0 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -18,6 +18,7 @@ The feasible arguments include:
         - `BACE_classification`
         - `BACE_regression`
         - `BBBP`
+        - `ClinTox`
 - **Hyperparameter Search (optional)**: `-hs`
     - Perform a hyperparameter search using Bayesian optimization. It determines the best 
       hyperparameters based on the validation metric averaged across 3 runs.
@@ -40,6 +41,7 @@ The feasible arguments include:
         - `BACE_classification`
         - `BACE_regression`
         - `BBBP`
+        - `ClinTox`
 - **Hyperparameter Search (optional)**: `-hs`
     - Perform a hyperparameter search using Bayesian optimization. It determines the best 
       hyperparameters based on the validation metric averaged across 3 runs.
diff --git a/examples/fingerprint.py b/examples/fingerprint.py
index dd1fcd9..5fd7161 100644
--- a/examples/fingerprint.py
+++ b/examples/fingerprint.py
@@ -28,7 +28,7 @@ def rf_model_builder(model_dir, hyperparams, mode):
 
 
 def load_model(args, tasks, hyperparams):
-  if args['dataset'] in ['BACE_classification', 'BBBP']:
+  if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
     mode = 'classification'
   elif args['dataset'] in ['BACE_regression']:
     mode = 'regression'
@@ -100,7 +100,7 @@ def init_hyper_search_space(args):
         'min_samples_split': hp.choice('min_samples_split', [2, 4, 8, 16, 32]),
         'bootstrap': hp.choice('bootstrap', [True, False]),
     }
-    if args['dataset'] in ['BACE_classification', 'BBBP']:
+    if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
       search_space['criterion'] = hp.choice('criterion', ["gini", "entropy"])
     else:
       search_space['criterion'] = hp.choice('criterion', ["mse", "mae"])
@@ -154,7 +154,7 @@ def objective(hyperparams):
   parser.add_argument(
       '-d',
       '--dataset',
-      choices=['BACE_classification', 'BACE_regression', 'BBBP'],
+      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'],
       help='Dataset to use')
   parser.add_argument(
       '-m',
diff --git a/examples/gnn.py b/examples/gnn.py
index 9aee517..a641913 100644
--- a/examples/gnn.py
+++ b/examples/gnn.py
@@ -11,7 +11,7 @@
 
 
 def load_model(save_pth, args, tasks, hyperparams):
-  if args['dataset'] in ['BACE_classification', 'BBBP']:
+  if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
     mode = 'classification'
     # binary classification
     n_classes = 2
@@ -177,7 +177,7 @@ def objective(hyperparams):
   parser.add_argument(
       '-d',
       '--dataset',
-      choices=['BACE_classification', 'BACE_regression', 'BBBP'],
+      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'],
       help='Dataset to use')
   parser.add_argument(
       '-m',
diff --git a/examples/utils.py b/examples/utils.py
index b08a56f..fd4c5fb 100644
--- a/examples/utils.py
+++ b/examples/utils.py
@@ -4,7 +4,7 @@
 
 
 def decide_metric(dataset):
-  if dataset in ['BACE_classification', 'BBBP']:
+  if dataset in ['BACE_classification', 'BBBP', 'ClinTox']:
     return 'roc_auc'
   elif dataset == 'BACE_regression':
     return 'rmse'
@@ -75,6 +75,10 @@ def load_dataset(args):
     from deepchem.molnet import load_bace_regression
     tasks, all_dataset, transformers = load_bace_regression(
         featurizer=featurizer, splitter=splitter, reload=False)
+  elif args['dataset'] == 'ClinTox':
+    from deepchem.molnet import load_clintox
+    tasks, all_dataset, transformers = load_clintox(
+        featurizer=featurizer, splitter=splitter, reload=False)
   else:
     raise ValueError('Unexpected dataset: {}'.format(args['dataset']))
 

From 2cc0328ec7770071ec61e397a4f4c127c05dc859 Mon Sep 17 00:00:00 2001
From: mufeili <mufeili1996@gmail.com>
Date: Fri, 22 Jan 2021 03:42:07 +0800
Subject: [PATCH 2/4] Update

---
 README.md                                | 3 +++
 examples/configures/GCN_GC/ClinTox.json  | 8 ++++++++
 examples/configures/RF_ECFP/ClinTox.json | 6 ++++++
 3 files changed, 17 insertions(+)
 create mode 100644 examples/configures/GCN_GC/ClinTox.json
 create mode 100644 examples/configures/RF_ECFP/ClinTox.json

diff --git a/README.md b/README.md
index 52192b6..1afa93b 100644
--- a/README.md
+++ b/README.md
@@ -28,3 +28,6 @@
 ### ClinTox
 
 | Rank | Model         | Featurization  | Test ROC-AUC     | Validation ROC-AUC | Contact                           | References	                                                                           | Date           |
+| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
+| 1    | GCN           | GraphConv      | 0.9065 +- 0.0179 | 0.9880 +- 0.0073   | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples)                            | Jan 22nd, 2021 |
+| 2    | Random Forest | 1024-bit ECFP4 | 0.7829 +- 0.0235 | 0.8883 +- 0.0230   | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Jan 22nd, 2021 |
diff --git a/examples/configures/GCN_GC/ClinTox.json b/examples/configures/GCN_GC/ClinTox.json
new file mode 100644
index 0000000..bab7803
--- /dev/null
+++ b/examples/configures/GCN_GC/ClinTox.json
@@ -0,0 +1,8 @@
+{
+  "batchnorm": true,
+  "dropout": 0.04333497108612183,
+  "hidden_feats": 128,
+  "lr": 0.15302291932022413,
+  "num_gnn_layers": 2,
+  "residual": true
+}
\ No newline at end of file
diff --git a/examples/configures/RF_ECFP/ClinTox.json b/examples/configures/RF_ECFP/ClinTox.json
new file mode 100644
index 0000000..7c9475c
--- /dev/null
+++ b/examples/configures/RF_ECFP/ClinTox.json
@@ -0,0 +1,6 @@
+{
+  "bootstrap": false,
+  "criterion": "entropy",
+  "min_samples_split": 16,
+  "n_estimators": 100
+}
\ No newline at end of file

From 8829c5cd4762c9b70449b07aeadb8629ce828637 Mon Sep 17 00:00:00 2001
From: mufeili <mufeili1996@gmail.com>
Date: Thu, 28 Jan 2021 17:49:21 +0800
Subject: [PATCH 3/4] Update

---
 README.md               | 7 +++++++
 examples/fingerprint.py | 4 ++--
 examples/gnn.py         | 4 ++--
 examples/utils.py       | 6 +++++-
 4 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 1afa93b..b017582 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,12 @@
 # MoleculeNet Leaderboard
 
+## Physical Chemistry
+
+### Delaney (ESOL)
+
+| Rank | Model         | Featurization  | Test ROC-AUC     | Validation ROC-AUC | Contact                           | References	                                                                           | Date           |
+| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
+
 ## Biophysics
 
 ### BACE Classification
diff --git a/examples/fingerprint.py b/examples/fingerprint.py
index 5fd7161..287b101 100644
--- a/examples/fingerprint.py
+++ b/examples/fingerprint.py
@@ -30,7 +30,7 @@ def rf_model_builder(model_dir, hyperparams, mode):
 def load_model(args, tasks, hyperparams):
   if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
     mode = 'classification'
-  elif args['dataset'] in ['BACE_regression']:
+  elif args['dataset'] in ['BACE_regression', 'Delaney']:
     mode = 'regression'
   else:
     raise ValueError('Unexpected dataset: {}'.format(args['dataset']))
@@ -154,7 +154,7 @@ def objective(hyperparams):
   parser.add_argument(
       '-d',
       '--dataset',
-      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'],
+      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox', 'Delaney'],
       help='Dataset to use')
   parser.add_argument(
       '-m',
diff --git a/examples/gnn.py b/examples/gnn.py
index a641913..9c89fab 100644
--- a/examples/gnn.py
+++ b/examples/gnn.py
@@ -15,7 +15,7 @@ def load_model(save_pth, args, tasks, hyperparams):
     mode = 'classification'
     # binary classification
     n_classes = 2
-  elif args['dataset'] in ['BACE_regression']:
+  elif args['dataset'] in ['BACE_regression', 'Delaney']:
     mode = 'regression'
     n_classes = None
   else:
@@ -177,7 +177,7 @@ def objective(hyperparams):
   parser.add_argument(
       '-d',
       '--dataset',
-      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'],
+      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox', 'Delaney'],
       help='Dataset to use')
   parser.add_argument(
       '-m',
diff --git a/examples/utils.py b/examples/utils.py
index fd4c5fb..f4002cc 100644
--- a/examples/utils.py
+++ b/examples/utils.py
@@ -6,7 +6,7 @@
 def decide_metric(dataset):
   if dataset in ['BACE_classification', 'BBBP', 'ClinTox']:
     return 'roc_auc'
-  elif dataset == 'BACE_regression':
+  elif dataset in ['BACE_regression', 'Delaney']:
     return 'rmse'
   else:
     return ValueError('Unexpected dataset: {}'.format(dataset))
@@ -79,6 +79,10 @@ def load_dataset(args):
     from deepchem.molnet import load_clintox
     tasks, all_dataset, transformers = load_clintox(
         featurizer=featurizer, splitter=splitter, reload=False)
+  elif args['dataset'] == 'Delaney':
+    from deepchem.molnet import load_delaney
+    tasks, all_dataset, transformers = load_delaney(
+        featurizer=featurizer, splitter=splitter, reload=False)
   else:
     raise ValueError('Unexpected dataset: {}'.format(args['dataset']))
 

From dd31878164f015ca67b624f708c94e11c553257f Mon Sep 17 00:00:00 2001
From: mufeili <mufeili1996@gmail.com>
Date: Thu, 28 Jan 2021 21:23:26 +0800
Subject: [PATCH 4/4] Update

---
 README.md                                | 6 ++++--
 examples/configures/GCN_GC/Delaney.json  | 8 ++++++++
 examples/configures/RF_ECFP/Delaney.json | 6 ++++++
 3 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 examples/configures/GCN_GC/Delaney.json
 create mode 100644 examples/configures/RF_ECFP/Delaney.json

diff --git a/README.md b/README.md
index b017582..f3cbf22 100644
--- a/README.md
+++ b/README.md
@@ -4,8 +4,10 @@
 
 ### Delaney (ESOL)
 
-| Rank | Model         | Featurization  | Test ROC-AUC     | Validation ROC-AUC | Contact                           | References	                                                                           | Date           |
-| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
+| Rank | Model         | Featurization  | Test RMSE        | Validation RMSE  | Contact                           | References	                                                                             | Date           |
+| ---- | ------------- | -------------- | ---------------- | ---------------- | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
+| 1    | GCN           | GraphConv      | 0.8851 +- 0.0292 | 0.9405 +- 0.0310 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples)                            | Jan 28th, 2020 |
+| 2    | Random Forest | 1024-bit ECFP4 | 1.7406 +- 0.0261 | 1.7932 +- 0.0153 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Jan 28th, 2020 |
 
 ## Biophysics
 
diff --git a/examples/configures/GCN_GC/Delaney.json b/examples/configures/GCN_GC/Delaney.json
new file mode 100644
index 0000000..50f6bbc
--- /dev/null
+++ b/examples/configures/GCN_GC/Delaney.json
@@ -0,0 +1,8 @@
+{
+  "batchnorm": false,
+  "dropout": 0.01913243832609146,
+  "hidden_feats": 64,
+  "lr": 0.16034207807172668,
+  "num_gnn_layers": 2,
+  "residual": true
+}
diff --git a/examples/configures/RF_ECFP/Delaney.json b/examples/configures/RF_ECFP/Delaney.json
new file mode 100644
index 0000000..e02bf49
--- /dev/null
+++ b/examples/configures/RF_ECFP/Delaney.json
@@ -0,0 +1,6 @@
+{
+  "bootstrap": true,
+  "criterion": "mae",
+  "min_samples_split": 16,
+  "n_estimators": 30
+}