Merge pull request #1 from deepchem/master

Update
deepchem · Jan 28, 2021 · 5d4ad07 · 5d4ad07
2 parents 9bc46bb + edfde30
commit 5d4ad07
Show file tree

Hide file tree

Showing 7 changed files with 33 additions and 6 deletions.
diff --git a/README.md b/README.md
@@ -24,3 +24,10 @@
 | ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
 | 1    | Random Forest | 1024-bit ECFP4 | 0.9540 +- 0.0038 | 0.9062 +- 0.0079   | [Mufei Li]([email protected]) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 30th, 2020 |
 | 2    | GCN           | GraphConv      | 0.9214 +- 0.0106 | 0.9445 +- 0.0049   | [Mufei Li]([email protected]) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples)                            | Dec 30th, 2020 |
+
+### ClinTox
+
+| Rank | Model         | Featurization  | Test ROC-AUC     | Validation ROC-AUC | Contact                           | References	                                                                           | Date           |
+| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- |
+| 1    | GCN           | GraphConv      | 0.9065 +- 0.0179 | 0.9880 +- 0.0073   | [Mufei Li]([email protected]) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples)                            | Jan 22nd, 2021 |
+| 2    | Random Forest | 1024-bit ECFP4 | 0.7829 +- 0.0235 | 0.8883 +- 0.0230   | [Mufei Li]([email protected]) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Jan 22nd, 2021 |
diff --git a/examples/README.md b/examples/README.md
@@ -18,6 +18,7 @@ The feasible arguments include:
         - `BACE_classification`
         - `BACE_regression`
         - `BBBP`
+        - `ClinTox`
 - **Hyperparameter Search (optional)**: `-hs`
     - Perform a hyperparameter search using Bayesian optimization. It determines the best 
       hyperparameters based on the validation metric averaged across 3 runs.
@@ -40,6 +41,7 @@ The feasible arguments include:
         - `BACE_classification`
         - `BACE_regression`
         - `BBBP`
+        - `ClinTox`
 - **Hyperparameter Search (optional)**: `-hs`
     - Perform a hyperparameter search using Bayesian optimization. It determines the best 
       hyperparameters based on the validation metric averaged across 3 runs.

diff --git a/examples/configures/GCN_GC/ClinTox.json b/examples/configures/GCN_GC/ClinTox.json
@@ -0,0 +1,8 @@
+{
+  "batchnorm": true,
+  "dropout": 0.04333497108612183,
+  "hidden_feats": 128,
+  "lr": 0.15302291932022413,
+  "num_gnn_layers": 2,
+  "residual": true
+}
diff --git a/examples/configures/RF_ECFP/ClinTox.json b/examples/configures/RF_ECFP/ClinTox.json
@@ -0,0 +1,6 @@
+{
+  "bootstrap": false,
+  "criterion": "entropy",
+  "min_samples_split": 16,
+  "n_estimators": 100
+}
diff --git a/examples/fingerprint.py b/examples/fingerprint.py
@@ -28,7 +28,7 @@ def rf_model_builder(model_dir, hyperparams, mode):
 
 
 def load_model(args, tasks, hyperparams):
-  if args['dataset'] in ['BACE_classification', 'BBBP']:
+  if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
     mode = 'classification'
   elif args['dataset'] in ['BACE_regression']:
     mode = 'regression'
@@ -100,7 +100,7 @@ def init_hyper_search_space(args):
         'min_samples_split': hp.choice('min_samples_split', [2, 4, 8, 16, 32]),
         'bootstrap': hp.choice('bootstrap', [True, False]),
     }
-    if args['dataset'] in ['BACE_classification', 'BBBP']:
+    if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
       search_space['criterion'] = hp.choice('criterion', ["gini", "entropy"])
     else:
       search_space['criterion'] = hp.choice('criterion', ["mse", "mae"])
@@ -154,7 +154,7 @@ def objective(hyperparams):
   parser.add_argument(
       '-d',
       '--dataset',
-      choices=['BACE_classification', 'BACE_regression', 'BBBP'],
+      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'],
       help='Dataset to use')
   parser.add_argument(
       '-m',

diff --git a/examples/gnn.py b/examples/gnn.py
@@ -11,7 +11,7 @@
 
 
 def load_model(save_pth, args, tasks, hyperparams):
-  if args['dataset'] in ['BACE_classification', 'BBBP']:
+  if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']:
     mode = 'classification'
     # binary classification
     n_classes = 2
@@ -177,7 +177,7 @@ def objective(hyperparams):
   parser.add_argument(
       '-d',
       '--dataset',
-      choices=['BACE_classification', 'BACE_regression', 'BBBP'],
+      choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'],
       help='Dataset to use')
   parser.add_argument(
       '-m',

diff --git a/examples/utils.py b/examples/utils.py
@@ -4,7 +4,7 @@
 
 
 def decide_metric(dataset):
-  if dataset in ['BACE_classification', 'BBBP']:
+  if dataset in ['BACE_classification', 'BBBP', 'ClinTox']:
     return 'roc_auc'
   elif dataset == 'BACE_regression':
     return 'rmse'
@@ -75,6 +75,10 @@ def load_dataset(args):
     from deepchem.molnet import load_bace_regression
     tasks, all_dataset, transformers = load_bace_regression(
         featurizer=featurizer, splitter=splitter, reload=False)
+  elif args['dataset'] == 'ClinTox':
+    from deepchem.molnet import load_clintox
+    tasks, all_dataset, transformers = load_clintox(
+        featurizer=featurizer, splitter=splitter, reload=False)
   else:
     raise ValueError('Unexpected dataset: {}'.format(args['dataset']))