From b0dd3ab71fb3475a1eeff42258030aa35c6068a3 Mon Sep 17 00:00:00 2001 From: mufeili Date: Fri, 22 Jan 2021 02:11:41 +0800 Subject: [PATCH 1/4] Update --- README.md | 4 ++++ examples/README.md | 2 ++ examples/fingerprint.py | 6 +++--- examples/gnn.py | 4 ++-- examples/utils.py | 6 +++++- 5 files changed, 16 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 389b5b7..52192b6 100644 --- a/README.md +++ b/README.md @@ -24,3 +24,7 @@ | ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | | 1 | Random Forest | 1024-bit ECFP4 | 0.9540 +- 0.0038 | 0.9062 +- 0.0079 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Dec 30th, 2020 | | 2 | GCN | GraphConv | 0.9214 +- 0.0106 | 0.9445 +- 0.0049 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples) | Dec 30th, 2020 | + +### ClinTox + +| Rank | Model | Featurization | Test ROC-AUC | Validation ROC-AUC | Contact | References | Date | diff --git a/examples/README.md b/examples/README.md index 8621cbf..f9c95d0 100644 --- a/examples/README.md +++ b/examples/README.md @@ -18,6 +18,7 @@ The feasible arguments include: - `BACE_classification` - `BACE_regression` - `BBBP` + - `ClinTox` - **Hyperparameter Search (optional)**: `-hs` - Perform a hyperparameter search using Bayesian optimization. It determines the best hyperparameters based on the validation metric averaged across 3 runs. @@ -40,6 +41,7 @@ The feasible arguments include: - `BACE_classification` - `BACE_regression` - `BBBP` + - `ClinTox` - **Hyperparameter Search (optional)**: `-hs` - Perform a hyperparameter search using Bayesian optimization. It determines the best hyperparameters based on the validation metric averaged across 3 runs. diff --git a/examples/fingerprint.py b/examples/fingerprint.py index dd1fcd9..5fd7161 100644 --- a/examples/fingerprint.py +++ b/examples/fingerprint.py @@ -28,7 +28,7 @@ def rf_model_builder(model_dir, hyperparams, mode): def load_model(args, tasks, hyperparams): - if args['dataset'] in ['BACE_classification', 'BBBP']: + if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']: mode = 'classification' elif args['dataset'] in ['BACE_regression']: mode = 'regression' @@ -100,7 +100,7 @@ def init_hyper_search_space(args): 'min_samples_split': hp.choice('min_samples_split', [2, 4, 8, 16, 32]), 'bootstrap': hp.choice('bootstrap', [True, False]), } - if args['dataset'] in ['BACE_classification', 'BBBP']: + if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']: search_space['criterion'] = hp.choice('criterion', ["gini", "entropy"]) else: search_space['criterion'] = hp.choice('criterion', ["mse", "mae"]) @@ -154,7 +154,7 @@ def objective(hyperparams): parser.add_argument( '-d', '--dataset', - choices=['BACE_classification', 'BACE_regression', 'BBBP'], + choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'], help='Dataset to use') parser.add_argument( '-m', diff --git a/examples/gnn.py b/examples/gnn.py index 9aee517..a641913 100644 --- a/examples/gnn.py +++ b/examples/gnn.py @@ -11,7 +11,7 @@ def load_model(save_pth, args, tasks, hyperparams): - if args['dataset'] in ['BACE_classification', 'BBBP']: + if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']: mode = 'classification' # binary classification n_classes = 2 @@ -177,7 +177,7 @@ def objective(hyperparams): parser.add_argument( '-d', '--dataset', - choices=['BACE_classification', 'BACE_regression', 'BBBP'], + choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'], help='Dataset to use') parser.add_argument( '-m', diff --git a/examples/utils.py b/examples/utils.py index b08a56f..fd4c5fb 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -4,7 +4,7 @@ def decide_metric(dataset): - if dataset in ['BACE_classification', 'BBBP']: + if dataset in ['BACE_classification', 'BBBP', 'ClinTox']: return 'roc_auc' elif dataset == 'BACE_regression': return 'rmse' @@ -75,6 +75,10 @@ def load_dataset(args): from deepchem.molnet import load_bace_regression tasks, all_dataset, transformers = load_bace_regression( featurizer=featurizer, splitter=splitter, reload=False) + elif args['dataset'] == 'ClinTox': + from deepchem.molnet import load_clintox + tasks, all_dataset, transformers = load_clintox( + featurizer=featurizer, splitter=splitter, reload=False) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) From 2cc0328ec7770071ec61e397a4f4c127c05dc859 Mon Sep 17 00:00:00 2001 From: mufeili Date: Fri, 22 Jan 2021 03:42:07 +0800 Subject: [PATCH 2/4] Update --- README.md | 3 +++ examples/configures/GCN_GC/ClinTox.json | 8 ++++++++ examples/configures/RF_ECFP/ClinTox.json | 6 ++++++ 3 files changed, 17 insertions(+) create mode 100644 examples/configures/GCN_GC/ClinTox.json create mode 100644 examples/configures/RF_ECFP/ClinTox.json diff --git a/README.md b/README.md index 52192b6..1afa93b 100644 --- a/README.md +++ b/README.md @@ -28,3 +28,6 @@ ### ClinTox | Rank | Model | Featurization | Test ROC-AUC | Validation ROC-AUC | Contact | References | Date | +| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | +| 1 | GCN | GraphConv | 0.9065 +- 0.0179 | 0.9880 +- 0.0073 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples) | Jan 22nd, 2021 | +| 2 | Random Forest | 1024-bit ECFP4 | 0.7829 +- 0.0235 | 0.8883 +- 0.0230 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Jan 22nd, 2021 | diff --git a/examples/configures/GCN_GC/ClinTox.json b/examples/configures/GCN_GC/ClinTox.json new file mode 100644 index 0000000..bab7803 --- /dev/null +++ b/examples/configures/GCN_GC/ClinTox.json @@ -0,0 +1,8 @@ +{ + "batchnorm": true, + "dropout": 0.04333497108612183, + "hidden_feats": 128, + "lr": 0.15302291932022413, + "num_gnn_layers": 2, + "residual": true +} \ No newline at end of file diff --git a/examples/configures/RF_ECFP/ClinTox.json b/examples/configures/RF_ECFP/ClinTox.json new file mode 100644 index 0000000..7c9475c --- /dev/null +++ b/examples/configures/RF_ECFP/ClinTox.json @@ -0,0 +1,6 @@ +{ + "bootstrap": false, + "criterion": "entropy", + "min_samples_split": 16, + "n_estimators": 100 +} \ No newline at end of file From 8829c5cd4762c9b70449b07aeadb8629ce828637 Mon Sep 17 00:00:00 2001 From: mufeili Date: Thu, 28 Jan 2021 17:49:21 +0800 Subject: [PATCH 3/4] Update --- README.md | 7 +++++++ examples/fingerprint.py | 4 ++-- examples/gnn.py | 4 ++-- examples/utils.py | 6 +++++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1afa93b..b017582 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,12 @@ # MoleculeNet Leaderboard +## Physical Chemistry + +### Delaney (ESOL) + +| Rank | Model | Featurization | Test ROC-AUC | Validation ROC-AUC | Contact | References | Date | +| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | + ## Biophysics ### BACE Classification diff --git a/examples/fingerprint.py b/examples/fingerprint.py index 5fd7161..287b101 100644 --- a/examples/fingerprint.py +++ b/examples/fingerprint.py @@ -30,7 +30,7 @@ def rf_model_builder(model_dir, hyperparams, mode): def load_model(args, tasks, hyperparams): if args['dataset'] in ['BACE_classification', 'BBBP', 'ClinTox']: mode = 'classification' - elif args['dataset'] in ['BACE_regression']: + elif args['dataset'] in ['BACE_regression', 'Delaney']: mode = 'regression' else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) @@ -154,7 +154,7 @@ def objective(hyperparams): parser.add_argument( '-d', '--dataset', - choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'], + choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox', 'Delaney'], help='Dataset to use') parser.add_argument( '-m', diff --git a/examples/gnn.py b/examples/gnn.py index a641913..9c89fab 100644 --- a/examples/gnn.py +++ b/examples/gnn.py @@ -15,7 +15,7 @@ def load_model(save_pth, args, tasks, hyperparams): mode = 'classification' # binary classification n_classes = 2 - elif args['dataset'] in ['BACE_regression']: + elif args['dataset'] in ['BACE_regression', 'Delaney']: mode = 'regression' n_classes = None else: @@ -177,7 +177,7 @@ def objective(hyperparams): parser.add_argument( '-d', '--dataset', - choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox'], + choices=['BACE_classification', 'BACE_regression', 'BBBP', 'ClinTox', 'Delaney'], help='Dataset to use') parser.add_argument( '-m', diff --git a/examples/utils.py b/examples/utils.py index fd4c5fb..f4002cc 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -6,7 +6,7 @@ def decide_metric(dataset): if dataset in ['BACE_classification', 'BBBP', 'ClinTox']: return 'roc_auc' - elif dataset == 'BACE_regression': + elif dataset in ['BACE_regression', 'Delaney']: return 'rmse' else: return ValueError('Unexpected dataset: {}'.format(dataset)) @@ -79,6 +79,10 @@ def load_dataset(args): from deepchem.molnet import load_clintox tasks, all_dataset, transformers = load_clintox( featurizer=featurizer, splitter=splitter, reload=False) + elif args['dataset'] == 'Delaney': + from deepchem.molnet import load_delaney + tasks, all_dataset, transformers = load_delaney( + featurizer=featurizer, splitter=splitter, reload=False) else: raise ValueError('Unexpected dataset: {}'.format(args['dataset'])) From dd31878164f015ca67b624f708c94e11c553257f Mon Sep 17 00:00:00 2001 From: mufeili Date: Thu, 28 Jan 2021 21:23:26 +0800 Subject: [PATCH 4/4] Update --- README.md | 6 ++++-- examples/configures/GCN_GC/Delaney.json | 8 ++++++++ examples/configures/RF_ECFP/Delaney.json | 6 ++++++ 3 files changed, 18 insertions(+), 2 deletions(-) create mode 100644 examples/configures/GCN_GC/Delaney.json create mode 100644 examples/configures/RF_ECFP/Delaney.json diff --git a/README.md b/README.md index b017582..f3cbf22 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,10 @@ ### Delaney (ESOL) -| Rank | Model | Featurization | Test ROC-AUC | Validation ROC-AUC | Contact | References | Date | -| ---- | ------------- | -------------- | ---------------- | ------------------ | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | +| Rank | Model | Featurization | Test RMSE | Validation RMSE | Contact | References | Date | +| ---- | ------------- | -------------- | ---------------- | ---------------- | --------------------------------- | ---------------------------------------------------------------------------------------- | -------------- | +| 1 | GCN | GraphConv | 0.8851 +- 0.0292 | 0.9405 +- 0.0310 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://arxiv.org/abs/1609.02907), [Code](./examples) | Jan 28th, 2020 | +| 2 | Random Forest | 1024-bit ECFP4 | 1.7406 +- 0.0261 | 1.7932 +- 0.0153 | [Mufei Li](mufeili1996@gmail.com) | [Paper](https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf), [Code](./examples) | Jan 28th, 2020 | ## Biophysics diff --git a/examples/configures/GCN_GC/Delaney.json b/examples/configures/GCN_GC/Delaney.json new file mode 100644 index 0000000..50f6bbc --- /dev/null +++ b/examples/configures/GCN_GC/Delaney.json @@ -0,0 +1,8 @@ +{ + "batchnorm": false, + "dropout": 0.01913243832609146, + "hidden_feats": 64, + "lr": 0.16034207807172668, + "num_gnn_layers": 2, + "residual": true +} diff --git a/examples/configures/RF_ECFP/Delaney.json b/examples/configures/RF_ECFP/Delaney.json new file mode 100644 index 0000000..e02bf49 --- /dev/null +++ b/examples/configures/RF_ECFP/Delaney.json @@ -0,0 +1,6 @@ +{ + "bootstrap": true, + "criterion": "mae", + "min_samples_split": 16, + "n_estimators": 30 +}