From 466c60aabe25ddf4e9bf944047f58a3fe66bef53 Mon Sep 17 00:00:00 2001 From: Niklas Rindtorff Date: Sun, 11 Sep 2022 17:44:45 +0200 Subject: [PATCH] main.py --- Dockerfile | 15 +++-- cog.yaml | 42 ------------- main.py | 110 ++++++++++++++++++++++++++++++++++ predict.py | 115 ------------------------------------ requirements.txt | 113 +++++++---------------------------- requirements_docker.txt | 21 ------- test/{test.txt => test.sdf} | 0 7 files changed, 137 insertions(+), 279 deletions(-) delete mode 100644 cog.yaml create mode 100644 main.py delete mode 100644 predict.py delete mode 100644 requirements_docker.txt rename test/{test.txt => test.sdf} (100%) diff --git a/Dockerfile b/Dockerfile index 0ca65c7a..254f1786 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,18 +5,17 @@ RUN echo "downloading basic packages for installation" RUN apt-get update RUN apt-get install -y tmux wget curl nano less git -WORKDIR /home/ +WORKDIR /src/ -COPY requirements_docker.txt ./ +COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements_docker.txt -RUN git clone https://github.com/NiklasTR/petri.git -RUN pip install petri/python +RUN pip install --no-cache-dir -r requirements.txt +# RUN git clone https://github.com/NiklasTR/petri.git +# RUN pip install petri/python COPY . . - # run a test -RUN python predict.py +# RUN python predict.py # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"] -#CMD ["bash"] \ No newline at end of file +CMD ["bash"] \ No newline at end of file diff --git a/cog.yaml b/cog.yaml deleted file mode 100644 index c3190f2f..00000000 --- a/cog.yaml +++ /dev/null @@ -1,42 +0,0 @@ -# Configuration for Cog ⚙️ -# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md - -build: - # set to true if your model requires a GPU - gpu: false - - # a list of ubuntu apt packages to install - system_packages: - - "tmux" - - "wget" - - "curl" - - "nano" - - "less" - - # python version in the form '3.8' or '3.8.12' - python_version: "3.8" - - # a list of packages in the format == - python_packages: - - "torch==1.12.1" - - "torchaudio==0.12.1" - - "rdkit==2022.3.5" - - "openbabel-wheel==3.1.1.5" - - "biopython==1.79" - - "biopandas==0.4.1" - - "pot==0.8.2" - - "dgl==0.9.0" - - "joblib==1.1.0" - - "pyaml==21.10.1" - - "icecream==2.1.3" - - "matplotlib==3.5.3" - - "tensorboard==2.10.0" - - "psutil==5.9.2" - - "dgllife==0.3.0" - - # commands run after the environment is setup - run: - - "echo env is ready!" - -# predict.py defines how predictions are run on your model -predict: "predict.py:Predictor" diff --git a/main.py b/main.py new file mode 100644 index 00000000..cd6c7599 --- /dev/null +++ b/main.py @@ -0,0 +1,110 @@ +import sys +import inference_VS_2 +import os +import yaml +import fastapi # a package we use to receive and return results via an API + +# define the API and a base message +app = FastAPI() +@app.get("/") +def root(): + return "waiting for input" + +# below we define the key function used within a docker and +# TODO define API endpoint? +# @app.get("/{input}") +def predict( + protein: str, # a PDB protein structure file + small_molecule_library: str, # "an SDF file containing >=2 small molecule ligands") + ): + # custom changes + args = inference_VS_2.parse_arguments() + args = args[0] + args.inference_path = '/src/tmp' # copy the input files to this directory for renaming and processing + args.output_directory = '/outputs' + + # formatting input + protein = str(protein) + small_molecule_library = str(small_molecule_library) + + # isolate the argument path basenames + protein_base = os.path.basename(protein) + small_molecule_library_base = os.path.basename(small_molecule_library) + + # defining file name + protein_destination = args.inference_path + '/dummy/protein_' + protein_base + small_molecule_library_destination = args.inference_path + '/dummy/ligands_' + small_molecule_library_base + + # moving files from the paths defined in the arguments to the input directory for processing + os.system('mkdir -p ' + args.inference_path + '/dummy') + os.system('mv ' + protein + ' ' + protein_destination) + os.system('mv ' + small_molecule_library + ' ' + small_molecule_library_destination) + + # the dataurl go package does not like .sdf files, the input should be given in .txt - something to add to petri + #small_molecule_library_sdf = os.path.splitext(small_molecule_library_destination)[0]+'.sdf' + #print("converting library to sdf: " + small_molecule_library_sdf) + #os.system('mv ' + small_molecule_library_destination + ' ' + small_molecule_library_sdf) + + # adding missings args, only works for one run_dir + args.multi_ligand = True + # args.model_parameters['noise_initial'] = 0 + + # running the inference - this is the main function - lifted from __main__ in inference_VS_2.py + if args.config: + config_dict = yaml.load(args.config, Loader=yaml.FullLoader) + arg_dict = args.__dict__ + for key, value in config_dict.items(): + if isinstance(value, list): + for v in value: + arg_dict[key].append(v) + # dropping comparisson with CMD line arguments + #else: + # if key in cmdline_args: + # continue + # arg_dict[key] = value + args.config = args.config.name + else: + config_dict = {} + + + for run_dir in args.run_dirs: + args.checkpoint = f'runs/{run_dir}/best_checkpoint.pt' + config_dict['checkpoint'] = f'runs/{run_dir}/best_checkpoint.pt' + # overwrite args with args from checkpoint except for the args that were contained in the config file + arg_dict = args.__dict__ + with open(os.path.join(os.path.dirname(args.checkpoint), 'train_arguments.yaml'), 'r') as arg_file: + checkpoint_dict = yaml.load(arg_file, Loader=yaml.FullLoader) + for key, value in checkpoint_dict.items(): + if (key not in config_dict.keys()): + if isinstance(value, list): + for v in value: + arg_dict[key].append(v) + else: + arg_dict[key] = value + args.model_parameters['noise_initial'] = 0 + if args.inference_path == None: + inference_VS_2.inference(args) + elif args.multi_ligand == True: + print('Running Multi-Ligand') + #print(args) + inference_VS_2.multi_lig_inference(args) + else: + inference_VS_2.inference_from_files(args) + + # moving the output file to the output directory + ouput_name = os.listdir(args.output_directory + '/dummy')[0] + output_path_sdf = args.output_directory + '/dummy/' + ouput_name + print(output_path_sdf) + + # the dataurl go package does not like .sdf files, the input should be given in .txt - something to add to petri + # output_path = os.path.splitext(output_path_sdf)[0]+'.txt' + # print("converting library to txt: " + output_path) + # os.system('mv ' + output_path_sdf + ' ' + output_path) + + # output + # output = Path(output_path) + return(output_path_sdf) + +if __name__ == "__main__": + #sys.argv[1] + print(predict(sys.argv[1], sys.argv[2])) \ No newline at end of file diff --git a/predict.py b/predict.py deleted file mode 100644 index 53c8b95c..00000000 --- a/predict.py +++ /dev/null @@ -1,115 +0,0 @@ -# Prediction interface for Cog ⚙️ -# https://github.com/replicate/cog/blob/main/docs/python.md - -from cog import BasePredictor, Input, Path -import sys -# custom changes -import inference_VS_2 -import os -import yaml - - -class Predictor(BasePredictor): - def predict( - self, - protein: Path = Input(description="a PDB protein structure file"), - small_molecule_library: Path = Input(description="an SDF file containing >=2 small molecule ligands"), - ) -> Path: - # custom changes - args = inference_VS_2.parse_arguments() - args = args[0] - args.inference_path = '/src/tmp' - args.output_directory = '/src/out' - - # formatting input - protein = str(protein) - small_molecule_library = str(small_molecule_library) - - # isolate the argument path basenames - protein_base = os.path.basename(protein) - small_molecule_library_base = os.path.basename(small_molecule_library) - - # defining file name - protein_destination = args.inference_path + '/dummy/protein_' + protein_base - small_molecule_library_destination = args.inference_path + '/dummy/ligands_' + small_molecule_library_base - - # moving files from the paths defined in the arguments to the input directory for processing - os.system('mkdir -p ' + args.inference_path + '/dummy') - os.system('mv ' + protein + ' ' + protein_destination) - os.system('mv ' + small_molecule_library + ' ' + small_molecule_library_destination) - - # the dataurl go package does not like .sdf files, the input should be given in .txt - something to add to petri - small_molecule_library_sdf = os.path.splitext(small_molecule_library_destination)[0]+'.sdf' - print("converting library to sdf: " + small_molecule_library_sdf) - os.system('mv ' + small_molecule_library_destination + ' ' + small_molecule_library_sdf) - - # adding missings args, only works for one run_dir - args.multi_ligand = True - # args.model_parameters['noise_initial'] = 0 - - # running the inference - this is the main function - lifted from __main__ in inference_VS_2.py - if args.config: - config_dict = yaml.load(args.config, Loader=yaml.FullLoader) - arg_dict = args.__dict__ - for key, value in config_dict.items(): - if isinstance(value, list): - for v in value: - arg_dict[key].append(v) - # dropping comparisson with CMD line arguments - #else: - # if key in cmdline_args: - # continue - # arg_dict[key] = value - args.config = args.config.name - else: - config_dict = {} - - - for run_dir in args.run_dirs: - args.checkpoint = f'runs/{run_dir}/best_checkpoint.pt' - config_dict['checkpoint'] = f'runs/{run_dir}/best_checkpoint.pt' - # overwrite args with args from checkpoint except for the args that were contained in the config file - arg_dict = args.__dict__ - with open(os.path.join(os.path.dirname(args.checkpoint), 'train_arguments.yaml'), 'r') as arg_file: - checkpoint_dict = yaml.load(arg_file, Loader=yaml.FullLoader) - for key, value in checkpoint_dict.items(): - if (key not in config_dict.keys()): - if isinstance(value, list): - for v in value: - arg_dict[key].append(v) - else: - arg_dict[key] = value - args.model_parameters['noise_initial'] = 0 - if args.inference_path == None: - inference_VS_2.inference(args) - elif args.multi_ligand == True: - print('Running Multi-Ligand') - #print(args) - inference_VS_2.multi_lig_inference(args) - else: - inference_VS_2.inference_from_files(args) - - # moving the output file to the output directory - ouput_name = os.listdir(args.output_directory + '/dummy')[0] - output_path_sdf = args.output_directory + '/dummy/' + ouput_name - print(output_path_sdf) - - # the dataurl go package does not like .sdf files, the input should be given in .txt - something to add to petri - output_path = os.path.splitext(output_path_sdf)[0]+'.txt' - print("converting library to txt: " + output_path) - os.system('mv ' + output_path_sdf + ' ' + output_path) - - # output - output = Path(output_path) - return(output) - -if __name__ == '__main__': - p = Predictor() - p.predict(protein = '/src/test/test.pdb', small_molecule_library = '/src/test/test.txt') - - # print(sys.argv[0]) - # print(sys.argv[1]) - # p.predict(protein = sys.argv[0], small_molecule_library = sys.argv[1]) - - # '/src/test/test.pdb' - # '/src/test/test.sdf' \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 77318c66..f90795bf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,94 +1,21 @@ -absl-py -aiohttp -aiosignal -asttokens -async-timeout -asynctest==0.13.0 -attrs -biopandas -biopython -blinker==1.4 -brotlipy -cachetools -certifi==2022.6.15 -cffi -charset-normalizer -click -cloudpickle -colorama -cryptography -cycler -dgl-cu113 -dglgo -dgllife -executing -fonttools -frozenlist -future -google-auth -google-auth-oauthlib -greenlet -grpcio -hyperopt -icecream -idna -importlib-metadata -joblib -kiwisolver -Markdown -matplotlib -multidict -munkres==1.1.4 -networkx -numpy -oauthlib -packaging -pandas==1.3.5 -Pillow -ply==3.11 -POT -protobuf==3.19.4 -psutil -py4j -pyaml -pyasn1==0.4.8 -pyasn1-modules==0.2.7 -pycairo==1.21.0 -pycparser -Pygments -PyJWT -pyOpenSSL -pyparsing -PyQt5==5.15.7 -PyQt5-sip==12.11.0 -PySocks -python-dateutil -pytz -pyu2f -PyYAML -rdkit -reportlab==3.5.68 -requests -requests-oauthlib -rsa -scikit-learn -scipy -sip -six -SQLAlchemy -tensorboard -tensorboard-data-server -tensorboard-plugin-wit -threadpoolctl -toml -torch==1.12.0 +torch==1.12.1 +torchvision torchaudio -torchvision -tornado -tqdm -typing_extensions -unicodedata2 -urllib3 -Werkzeug -yarl -zipp +rdkit +openbabel-wheel==3.1.1.5 +biopython +rdkit +biopandas +pot +dgl +joblib +pyaml +icecream +matplotlib +tensorboard +psutil +dgllife + +fastapi>=0.68.0,<0.69.0 +pydantic>=1.8.0,<2.0.0 +uvicorn>=0.15.0,<0.16.0 \ No newline at end of file diff --git a/requirements_docker.txt b/requirements_docker.txt deleted file mode 100644 index c4edfe35..00000000 --- a/requirements_docker.txt +++ /dev/null @@ -1,21 +0,0 @@ -torch==1.12.1 -torchvision -torchaudio -rdkit -openbabel-wheel==3.1.1.5 -biopython -rdkit -biopandas -pot -dgl -joblib -pyaml -icecream -matplotlib -tensorboard -psutil -dgllife - -# fastapi>=0.68.0,<0.69.0 -# pydantic>=1.8.0,<2.0.0 -# uvicorn>=0.15.0,<0.16.0 \ No newline at end of file diff --git a/test/test.txt b/test/test.sdf similarity index 100% rename from test/test.txt rename to test/test.sdf