-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
92d2d65
commit 467ed6e
Showing
13 changed files
with
429 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
[bumpversion] | ||
current_version = 0.1.0 | ||
commit = False | ||
tag = False | ||
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<dev>\d+))? | ||
serialize = | ||
{major}.{minor}.{patch}-{release}{dev} | ||
{major}.{minor}.{patch} | ||
|
||
[bumpversion:part:release] | ||
optional_value = _ | ||
first_value = dev | ||
values = | ||
dev | ||
_ | ||
|
||
[bumpversion:part:dev] | ||
|
||
[bumpversion:file:pyproject.toml] | ||
search = version = "{current_version}" | ||
replace = version = "{new_version}" | ||
|
||
[bumpversion:file:VERSION] | ||
|
||
[bumpversion:file:README.md] | ||
|
||
[bumpversion:file:plugin.json] | ||
|
||
[bumpversion:file:src/polus/mm/utils/load_trained_molgan_model/__init__.py] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
.venv | ||
out | ||
tests | ||
__pycache__ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
poetry.lock |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# CHANGELOG | ||
|
||
## 0.1.0 | ||
|
||
Initial release. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
# docker build -f Dockerfile -t polusai/molgan-tool:0.1.0 . | ||
FROM condaforge/mambaforge | ||
# NOT mambaforge-pypy3 (rdkit is incompatible with pypy) | ||
|
||
# RDKIT logging | ||
ENV RDKIT_ERROR_LOGGING="OFF" | ||
|
||
RUN apt-get update && apt-get install -y wget git | ||
|
||
# Install Python 3.10 using Mamba | ||
RUN mamba install -y python=3.10 | ||
|
||
# Clone MolGAN | ||
RUN git clone https://github.com/ndonyapour/MolGAN.git | ||
|
||
# Build and install python bindings | ||
# MolGAN was initially implemented using TensorFlow v1, and TensorFlow version 2 offers support | ||
# for v1 functionalities. However, it's important to mention that the current patch for upgrading | ||
# to v2 is not truly upgrading v1 API to v2 API, but calling legacy v1 API from v2 package via | ||
# "tf.compat.v1". Essentially, it’s still v1.certain. Truely upgrade to v2 requires rewriting most | ||
# functions of MolGAN, including model creation, data processing, and training. | ||
|
||
RUN mamba install -c conda-forge rdkit "tensorflow<2.13" numpy scikit-learn xorg-libxrender | ||
|
||
# Make sure rdkit is activated | ||
RUN python -c "import rdkit" | ||
|
||
# Train a Model | ||
WORKDIR /MolGAN | ||
|
||
# Download the gdb9 database | ||
RUN bash data/download_dataset.sh data/gdb9.sdf data/NP_score.pkl.gz data/SA_score.pkl.gz | ||
|
||
# Download the pretrained model | ||
RUN wget -nv --no-clobber https://huggingface.co/ndonyapour/MolGAN/resolve/main/MolGAN_model.tar.gz && tar xvzf MolGAN_model.tar.gz | ||
RUN mv MolGAN_model trained_models | ||
RUN wget -nv --no-clobber https://huggingface.co/ndonyapour/MolGAN/resolve/main/data.pkl -O data/data.pkl | ||
ADD Dockerfile . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# load_trained_molgan_model (0.1.0) | ||
|
||
MolGAN tool for generating small molecules | ||
|
||
## Options | ||
|
||
This plugin takes 7 input arguments and 3 output argument: | ||
|
||
| Name | Description | I/O | Type | Default | | ||
|---------------|-------------------------|--------|--------|---------| | ||
| input_data_path | Path to the input data file, Type: string, File type: input, Accepted formats: pkl, Example file: https://github.com/bioexcel/biobb_ml/raw/master/biobb_ml/test/reference/classification/ref_output_model_support_vector_machine.pkl | Input | string | string | | ||
| input_NP_Score_path | Output ceout file (AMBER ceout), Type: string, File type: input, Accepted formats: gz, Example file: https://github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz | Input | string | string | | ||
| input_SA_Score_path | Output ceout file (AMBER ceout), Type: string, File type: input, Accepted formats: gz, Example file: https://github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz | Input | string | string | | ||
| input_model_dir | Input directory of trained models | Input | string | string | | ||
| output_log_path | Path to the log file, Type: string, File type: output, Accepted formats: log | Input | string | string | | ||
| output_sdf_path | Path to the output file, Type: string, File type: output, Accepted formats: sdf | Input | string | string | | ||
| num_samples | The number of training epochs, Type: int | Input | int | int | | ||
| rdkit_error_logging | Enable or disable RDKit error logging | Input | string | string | | ||
| output_log_path | Path to the log file | Output | File | File | | ||
| output_sdf_path | Path to the output file | Output | File | File | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
0.1.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/bin/bash | ||
|
||
version=$(<VERSION) | ||
docker build . -t polusai/load-trained-molgan-model-tool:${version} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
specVersion: "0.1.0" | ||
name: load_trained_molgan_model | ||
version: 0.1.0 | ||
container: load-trained-molgan-model-plugin | ||
entrypoint: | ||
title: load_trained_molgan_model | ||
description: MolGAN tool for generating small molecules | ||
author: Data Scientist | ||
contact: [email protected] | ||
repository: | ||
documentation: | ||
citation: | ||
|
||
inputs: | ||
- name: input_data_path | ||
required: true | ||
description: Path to the input data file, Type string, File type input, Accepted formats pkl, Example file https//github.com/bioexcel/biobb_ml/raw/master/biobb_ml/test/reference/classification/ref_output_model_support_vector_machine.pkl | ||
type: string | ||
defaultValue: system.pkl | ||
format: | ||
uri: edam:format_3653 | ||
- name: input_NP_Score_path | ||
required: true | ||
description: Output ceout file (AMBER ceout), Type string, File type input, Accepted formats gz, Example file https//github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz | ||
type: string | ||
defaultValue: NP.gz | ||
format: | ||
uri: edam:format_3987 | ||
- name: input_SA_Score_path | ||
required: true | ||
description: Output ceout file (AMBER ceout), Type string, File type input, Accepted formats gz, Example file https//github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz | ||
type: string | ||
defaultValue: SA.gz | ||
format: | ||
uri: edam:format_3987 | ||
- name: input_model_dir | ||
required: true | ||
description: "Input directory of trained models" | ||
type: string | ||
defaultValue: output | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_log_path | ||
required: true | ||
description: Path to the log file, Type string, File type output, Accepted formats log | ||
type: string | ||
defaultValue: system.log | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_sdf_path | ||
required: true | ||
description: Path to the output file, Type string, File type output, Accepted formats sdf | ||
type: string | ||
defaultValue: system.sdf | ||
format: | ||
uri: edam:format_3814 | ||
- name: num_samples | ||
required: true | ||
description: The number of training epochs, Type int | ||
type: int | ||
defaultValue: 1000 | ||
format: | ||
uri: edam:format_2330 | ||
- name: rdkit_error_logging | ||
required: true | ||
description: Enable or disable RDKit error logging | ||
type: string | ||
defaultValue: ON | ||
outputs: | ||
- name: output_log_path | ||
required: true | ||
description: Path to the log file | ||
type: File | ||
format: | ||
uri: edam:format_2330 | ||
- name: output_sdf_path | ||
required: true | ||
description: Path to the output file | ||
type: File | ||
format: | ||
uri: edam:format_3814 | ||
ui: | ||
- key: inputs.input_data_path | ||
title: "input_data_path: " | ||
description: "Path to the input data file, Type string, File type input, Accepted formats pkl, Example file https//github.com/bioexcel/biobb_ml/raw/master/biobb_ml/test/reference/classification/ref_output_model_support_vector_machine.pkl" | ||
type: string | ||
- key: inputs.input_NP_Score_path | ||
title: "input_NP_Score_path: " | ||
description: "Output ceout file (AMBER ceout), Type string, File type input, Accepted formats gz, Example file https//github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz" | ||
type: string | ||
- key: inputs.input_SA_Score_path | ||
title: "input_SA_Score_path: " | ||
description: "Output ceout file (AMBER ceout), Type string, File type input, Accepted formats gz, Example file https//github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz" | ||
type: string | ||
- key: inputs.input_model_dir | ||
title: "input_model_dir: " | ||
description: "" | ||
type: string | ||
- key: inputs.output_log_path | ||
title: "output_log_path: " | ||
description: "Path to the log file, Type string, File type output, Accepted formats log" | ||
type: string | ||
- key: inputs.output_sdf_path | ||
title: "output_sdf_path: " | ||
description: "Path to the output file, Type string, File type output, Accepted formats sdf" | ||
type: string | ||
- key: inputs.num_samples | ||
title: "num_samples: " | ||
description: "The number of training epochs, Type int" | ||
type: int | ||
- key: inputs.rdkit_error_logging | ||
title: "rdkit_error_logging: " | ||
description: "Enable or disable RDKit error logging" | ||
type: string |
149 changes: 149 additions & 0 deletions
149
utils/load-trained-molgan-model-plugin/load_trained_molgan_model_0@[email protected]
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
#!/usr/bin/env cwl-runner | ||
cwlVersion: v1.0 | ||
|
||
class: CommandLineTool | ||
|
||
label: MolGAN tool for generating small molecules | ||
|
||
baseCommand: ["python", "/MolGAN/run_trained_model.py"] | ||
|
||
hints: | ||
DockerRequirement: | ||
dockerPull: polusai/molgan-tool@sha256:e008e74170be12dcf50a936a417b8c330ccdebf7fe17abaa8fa2689dac210725 | ||
|
||
# Set environment variables for the tool, | ||
# See: https://www.commonwl.org/user_guide/topics/environment-variables.html | ||
requirements: | ||
EnvVarRequirement: | ||
envDef: | ||
RDKIT_ERROR_LOGGING: $(inputs.rdkit_error_logging) | ||
inputs: | ||
input_data_path: | ||
label: Path to the input data file | ||
doc: |- | ||
Path to the input data file | ||
Type: string | ||
File type: input | ||
Accepted formats: pkl | ||
Example file: https://github.com/bioexcel/biobb_ml/raw/master/biobb_ml/test/reference/classification/ref_output_model_support_vector_machine.pkl | ||
type: string | ||
format: edam:format_3653 | ||
inputBinding: | ||
prefix: --input_data_path | ||
default: system.pkl | ||
|
||
input_NP_Score_path: | ||
label: Output ceout file (AMBER ceout) | ||
doc: |- | ||
Output ceout file (AMBER ceout) | ||
Type: string | ||
File type: input | ||
Accepted formats: gz | ||
Example file: https://github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz | ||
type: string | ||
format: edam:format_3987 | ||
default: NP.gz | ||
inputBinding: | ||
prefix: --input_NP_Score_path | ||
|
||
input_SA_Score_path: | ||
label: Output ceout file (AMBER ceout) | ||
doc: |- | ||
Output ceout file (AMBER ceout) | ||
Type: string | ||
File type: input | ||
Accepted formats: gz | ||
Example file: https://github.com/bioexcel/biobb_amber/raw/master/biobb_amber/test/data/cphstats/sander.ceout.gz | ||
type: string | ||
format: edam:format_3987 | ||
default: SA.gz | ||
inputBinding: | ||
prefix: --input_SA_Score_path | ||
|
||
input_model_dir: | ||
label: Input directory of trained models | ||
doc: |- | ||
Input directory of trained models | ||
type: string | ||
format: edam:format_2330 # 'Textual format' | ||
inputBinding: | ||
prefix: --input_model_dir | ||
default: output | ||
|
||
output_log_path: | ||
label: Path to the log file | ||
doc: |- | ||
Path to the log file | ||
Type: string | ||
File type: output | ||
Accepted formats: log | ||
type: string | ||
format: edam:format_2330 | ||
inputBinding: | ||
prefix: --output_log_path | ||
default: system.log | ||
|
||
output_sdf_path: | ||
label: Path to the output file | ||
doc: |- | ||
Path to the output file | ||
Type: string | ||
File type: output | ||
Accepted formats: sdf | ||
type: string | ||
format: edam:format_3814 # sdf | ||
default: system.sdf | ||
inputBinding: | ||
prefix: --output_sdf_path | ||
|
||
num_samples: | ||
label: The number of new molecules to generate | ||
doc: |- | ||
The number of training epochs | ||
Type: int | ||
type: int? | ||
format: edam:format_2330 | ||
inputBinding: | ||
position: 7 | ||
prefix: --num_samples | ||
default: 1000 | ||
|
||
rdkit_error_logging: | ||
label: Enable or disable RDKit error logging | ||
doc: |- | ||
Enable or disable RDKit error logging | ||
type: string? | ||
# RDKit prints out all errors by default, which can pose issues for CI, | ||
# particularly with large databases. It would be more efficient to suppress these errors. | ||
default: "ON" | ||
outputs: | ||
output_log_path: | ||
label: Path to the log file | ||
doc: |- | ||
Path to the log file | ||
type: File | ||
outputBinding: | ||
glob: $(inputs.output_log_path) | ||
format: edam:format_2330 | ||
|
||
output_sdf_path: | ||
label: Path to the output file | ||
doc: |- | ||
Path to the output file | ||
type: File | ||
outputBinding: | ||
glob: $(inputs.output_sdf_path) | ||
format: edam:format_3814 # sdf | ||
|
||
stderr: | ||
type: File | ||
outputBinding: | ||
glob: stderr | ||
|
||
stderr: stderr | ||
|
||
$namespaces: | ||
edam: https://edamontology.org/ | ||
|
||
$schemas: | ||
- https://raw.githubusercontent.com/edamontology/edamontology/master/EDAM_dev.owl |
Oops, something went wrong.