From 466c60aabe25ddf4e9bf944047f58a3fe66bef53 Mon Sep 17 00:00:00 2001
From: Niklas Rindtorff <NiklasTR@users.noreply.github.com>
Date: Sun, 11 Sep 2022 17:44:45 +0200
Subject: [PATCH] main.py

---
 Dockerfile                  |  15 +++--
 cog.yaml                    |  42 -------------
 main.py                     | 110 ++++++++++++++++++++++++++++++++++
 predict.py                  | 115 ------------------------------------
 requirements.txt            | 113 +++++++----------------------------
 requirements_docker.txt     |  21 -------
 test/{test.txt => test.sdf} |   0
 7 files changed, 137 insertions(+), 279 deletions(-)
 delete mode 100644 cog.yaml
 create mode 100644 main.py
 delete mode 100644 predict.py
 delete mode 100644 requirements_docker.txt
 rename test/{test.txt => test.sdf} (100%)
diff --git a/Dockerfile b/Dockerfile
index 0ca65c7a..254f1786 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -5,18 +5,17 @@ RUN echo "downloading basic packages for installation"
 RUN apt-get update
 RUN apt-get install -y tmux wget curl nano less git
 
-WORKDIR /home/
+WORKDIR /src/
 
-COPY requirements_docker.txt ./
+COPY requirements.txt ./
 
-RUN pip install --no-cache-dir -r requirements_docker.txt
-RUN git clone https://github.com/NiklasTR/petri.git
-RUN pip install petri/python
+RUN pip install --no-cache-dir -r requirements.txt
+# RUN git clone https://github.com/NiklasTR/petri.git
+# RUN pip install petri/python
 
 COPY . .
 
-
 # run a test
-RUN python predict.py
+# RUN python predict.py
 # CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
-#CMD ["bash"]
\ No newline at end of file
+CMD ["bash"]
\ No newline at end of file
diff --git a/cog.yaml b/cog.yaml
deleted file mode 100644
index c3190f2f..00000000
--- a/cog.yaml
+++ /dev/null
@@ -1,42 +0,0 @@
-# Configuration for Cog ⚙️
-# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
-
-build:
-  # set to true if your model requires a GPU
-  gpu: false
-
-  # a list of ubuntu apt packages to install
-  system_packages:
-    - "tmux"
-    - "wget"
-    - "curl"
-    - "nano"
-    - "less"
-
-  # python version in the form '3.8' or '3.8.12'
-  python_version: "3.8"
-
-  # a list of packages in the format <package-name>==<version>
-  python_packages:
-    - "torch==1.12.1"
-    - "torchaudio==0.12.1"
-    - "rdkit==2022.3.5"
-    - "openbabel-wheel==3.1.1.5"
-    - "biopython==1.79"
-    - "biopandas==0.4.1"
-    - "pot==0.8.2"
-    - "dgl==0.9.0"
-    - "joblib==1.1.0"
-    - "pyaml==21.10.1"
-    - "icecream==2.1.3"
-    - "matplotlib==3.5.3"
-    - "tensorboard==2.10.0"
-    - "psutil==5.9.2"
-    - "dgllife==0.3.0"
-  
-  # commands run after the environment is setup
-  run:
-    - "echo env is ready!"
-
-# predict.py defines how predictions are run on your model
-predict: "predict.py:Predictor"
diff --git a/main.py b/main.py
new file mode 100644
index 00000000..cd6c7599
--- /dev/null
+++ b/main.py
@@ -0,0 +1,110 @@
+import sys 
+import inference_VS_2
+import os
+import yaml
+import fastapi # a package we use to receive and return results via an API
+
+# define the API and a base message
+app = FastAPI()
+@app.get("/")
+def root():    
+    return "waiting for input"
+
+# below we define the key function used within a docker and 
+# TODO define API endpoint?
+# @app.get("/{input}")
+def predict(
+    protein: str, # a PDB protein structure file
+    small_molecule_library: str, # "an SDF file containing >=2 small molecule ligands")
+    ):
+    # custom changes
+    args = inference_VS_2.parse_arguments()
+    args = args[0]
+    args.inference_path = '/src/tmp' # copy the input files to this directory for renaming and processing
+    args.output_directory = '/outputs'
+
+    # formatting input
+    protein = str(protein)
+    small_molecule_library = str(small_molecule_library)     
+
+    # isolate the argument path basenames
+    protein_base = os.path.basename(protein)
+    small_molecule_library_base = os.path.basename(small_molecule_library)
+
+    # defining file name
+    protein_destination = args.inference_path + '/dummy/protein_' + protein_base
+    small_molecule_library_destination = args.inference_path + '/dummy/ligands_' + small_molecule_library_base
+
+    # moving files from the paths defined in the arguments to the input directory for processing
+    os.system('mkdir -p ' + args.inference_path + '/dummy')
+    os.system('mv ' + protein + ' ' + protein_destination)
+    os.system('mv ' + small_molecule_library + ' ' + small_molecule_library_destination)
+
+    # the dataurl go package does not like .sdf files, the input should be given in .txt - something to add to petri
+    #small_molecule_library_sdf = os.path.splitext(small_molecule_library_destination)[0]+'.sdf'
+    #print("converting library to sdf: " + small_molecule_library_sdf)
+    #os.system('mv ' + small_molecule_library_destination + ' ' + small_molecule_library_sdf)
+
+    # adding missings args, only works for one run_dir
+    args.multi_ligand = True
+    # args.model_parameters['noise_initial'] = 0
+
+    # running the inference - this is the main function - lifted from __main__ in inference_VS_2.py
+    if args.config:
+        config_dict = yaml.load(args.config, Loader=yaml.FullLoader)
+        arg_dict = args.__dict__
+        for key, value in config_dict.items():
+            if isinstance(value, list):
+                for v in value:
+                    arg_dict[key].append(v)
+            # dropping comparisson with CMD line arguments
+            #else:
+            #    if key in cmdline_args:
+            #        continue
+            #    arg_dict[key] = value
+        args.config = args.config.name
+    else:
+        config_dict = {}
+    
+
+    for run_dir in args.run_dirs:
+        args.checkpoint = f'runs/{run_dir}/best_checkpoint.pt'
+        config_dict['checkpoint'] = f'runs/{run_dir}/best_checkpoint.pt'
+        # overwrite args with args from checkpoint except for the args that were contained in the config file
+        arg_dict = args.__dict__
+        with open(os.path.join(os.path.dirname(args.checkpoint), 'train_arguments.yaml'), 'r') as arg_file:
+            checkpoint_dict = yaml.load(arg_file, Loader=yaml.FullLoader)
+        for key, value in checkpoint_dict.items():
+            if (key not in config_dict.keys()):
+                if isinstance(value, list):
+                    for v in value:
+                        arg_dict[key].append(v)
+                else:
+                    arg_dict[key] = value
+        args.model_parameters['noise_initial'] = 0
+        if args.inference_path == None:
+            inference_VS_2.inference(args)
+        elif args.multi_ligand == True:
+            print('Running Multi-Ligand')
+            #print(args)
+            inference_VS_2.multi_lig_inference(args)
+        else:
+            inference_VS_2.inference_from_files(args)
+
+    # moving the output file to the output directory
+    ouput_name = os.listdir(args.output_directory + '/dummy')[0]
+    output_path_sdf = args.output_directory + '/dummy/' + ouput_name
+    print(output_path_sdf)
+
+    # the dataurl go package does not like .sdf files, the input should be given in .txt - something to add to petri
+    # output_path = os.path.splitext(output_path_sdf)[0]+'.txt'
+    # print("converting library to txt: " + output_path)
+    # os.system('mv ' + output_path_sdf + ' ' + output_path)
+
+    # output
+    # output = Path(output_path)
+    return(output_path_sdf)
+
+if __name__ == "__main__":
+    #sys.argv[1]
+    print(predict(sys.argv[1], sys.argv[2]))
\ No newline at end of file
diff --git a/predict.py b/predict.py
deleted file mode 100644
index 53c8b95c..00000000
--- a/predict.py
+++ /dev/null
@@ -1,115 +0,0 @@
-# Prediction interface for Cog ⚙️
-# https://github.com/replicate/cog/blob/main/docs/python.md
-
-from cog import BasePredictor, Input, Path
-import sys
-# custom changes
-import inference_VS_2
-import os
-import yaml
-
-
-class Predictor(BasePredictor):
-    def predict(
-        self,
-        protein: Path = Input(description="a PDB protein structure file"),
-        small_molecule_library: Path = Input(description="an SDF file containing >=2 small molecule ligands"),
-    ) -> Path:    
-        # custom changes
-        args = inference_VS_2.parse_arguments()
-        args = args[0]
-        args.inference_path = '/src/tmp'
-        args.output_directory = '/src/out'
-
-        # formatting input
-        protein = str(protein)
-        small_molecule_library = str(small_molecule_library)     
-
-        # isolate the argument path basenames
-        protein_base = os.path.basename(protein)
-        small_molecule_library_base = os.path.basename(small_molecule_library)
-
-        # defining file name
-        protein_destination = args.inference_path + '/dummy/protein_' + protein_base
-        small_molecule_library_destination = args.inference_path + '/dummy/ligands_' + small_molecule_library_base
-
-        # moving files from the paths defined in the arguments to the input directory for processing
-        os.system('mkdir -p ' + args.inference_path + '/dummy')
-        os.system('mv ' + protein + ' ' + protein_destination)
-        os.system('mv ' + small_molecule_library + ' ' + small_molecule_library_destination)
-
-        # the dataurl go package does not like .sdf files, the input should be given in .txt - something to add to petri
-        small_molecule_library_sdf = os.path.splitext(small_molecule_library_destination)[0]+'.sdf'
-        print("converting library to sdf: " + small_molecule_library_sdf)
-        os.system('mv ' + small_molecule_library_destination + ' ' + small_molecule_library_sdf)
-
-        # adding missings args, only works for one run_dir
-        args.multi_ligand = True
-        # args.model_parameters['noise_initial'] = 0
-
-        # running the inference - this is the main function - lifted from __main__ in inference_VS_2.py
-        if args.config:
-            config_dict = yaml.load(args.config, Loader=yaml.FullLoader)
-            arg_dict = args.__dict__
-            for key, value in config_dict.items():
-                if isinstance(value, list):
-                    for v in value:
-                        arg_dict[key].append(v)
-                # dropping comparisson with CMD line arguments
-                #else:
-                #    if key in cmdline_args:
-                #        continue
-                #    arg_dict[key] = value
-            args.config = args.config.name
-        else:
-            config_dict = {}
-        
-
-        for run_dir in args.run_dirs:
-            args.checkpoint = f'runs/{run_dir}/best_checkpoint.pt'
-            config_dict['checkpoint'] = f'runs/{run_dir}/best_checkpoint.pt'
-            # overwrite args with args from checkpoint except for the args that were contained in the config file
-            arg_dict = args.__dict__
-            with open(os.path.join(os.path.dirname(args.checkpoint), 'train_arguments.yaml'), 'r') as arg_file:
-                checkpoint_dict = yaml.load(arg_file, Loader=yaml.FullLoader)
-            for key, value in checkpoint_dict.items():
-                if (key not in config_dict.keys()):
-                    if isinstance(value, list):
-                        for v in value:
-                            arg_dict[key].append(v)
-                    else:
-                        arg_dict[key] = value
-            args.model_parameters['noise_initial'] = 0
-            if args.inference_path == None:
-                inference_VS_2.inference(args)
-            elif args.multi_ligand == True:
-                print('Running Multi-Ligand')
-                #print(args)
-                inference_VS_2.multi_lig_inference(args)
-            else:
-                inference_VS_2.inference_from_files(args)
-
-        # moving the output file to the output directory
-        ouput_name = os.listdir(args.output_directory + '/dummy')[0]
-        output_path_sdf = args.output_directory + '/dummy/' + ouput_name
-        print(output_path_sdf)
-
-        # the dataurl go package does not like .sdf files, the input should be given in .txt - something to add to petri
-        output_path = os.path.splitext(output_path_sdf)[0]+'.txt'
-        print("converting library to txt: " + output_path)
-        os.system('mv ' + output_path_sdf + ' ' + output_path)
-
-        # output
-        output = Path(output_path)
-        return(output)
-
-if __name__ == '__main__':
-    p = Predictor()
-    p.predict(protein = '/src/test/test.pdb', small_molecule_library =  '/src/test/test.txt')
-    
-    # print(sys.argv[0])
-    # print(sys.argv[1])
-    # p.predict(protein = sys.argv[0], small_molecule_library =  sys.argv[1])
-    
-    # '/src/test/test.pdb'
-    # '/src/test/test.sdf'
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 77318c66..f90795bf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,94 +1,21 @@
-absl-py 
-aiohttp 
-aiosignal 
-asttokens 
-async-timeout 
-asynctest==0.13.0
-attrs 
-biopandas 
-biopython 
-blinker==1.4
-brotlipy 
-cachetools 
-certifi==2022.6.15
-cffi 
-charset-normalizer 
-click 
-cloudpickle 
-colorama 
-cryptography 
-cycler 
-dgl-cu113
-dglgo 
-dgllife 
-executing 
-fonttools 
-frozenlist 
-future 
-google-auth 
-google-auth-oauthlib 
-greenlet 
-grpcio 
-hyperopt 
-icecream 
-idna 
-importlib-metadata 
-joblib 
-kiwisolver 
-Markdown 
-matplotlib 
-multidict 
-munkres==1.1.4
-networkx 
-numpy 
-oauthlib 
-packaging 
-pandas==1.3.5
-Pillow 
-ply==3.11
-POT 
-protobuf==3.19.4
-psutil 
-py4j 
-pyaml 
-pyasn1==0.4.8
-pyasn1-modules==0.2.7
-pycairo==1.21.0
-pycparser 
-Pygments 
-PyJWT 
-pyOpenSSL 
-pyparsing 
-PyQt5==5.15.7
-PyQt5-sip==12.11.0
-PySocks 
-python-dateutil 
-pytz 
-pyu2f 
-PyYAML 
-rdkit
-reportlab==3.5.68
-requests 
-requests-oauthlib 
-rsa 
-scikit-learn 
-scipy 
-sip 
-six 
-SQLAlchemy 
-tensorboard 
-tensorboard-data-server 
-tensorboard-plugin-wit 
-threadpoolctl 
-toml 
-torch==1.12.0
+torch==1.12.1
+torchvision
 torchaudio
-torchvision 
-tornado 
-tqdm 
-typing_extensions 
-unicodedata2 
-urllib3 
-Werkzeug 
-yarl 
-zipp 
+rdkit
+openbabel-wheel==3.1.1.5
+biopython
+rdkit
+biopandas
+pot
+dgl
+joblib
+pyaml
+icecream
+matplotlib
+tensorboard
+psutil
+dgllife
+
+fastapi>=0.68.0,<0.69.0
+pydantic>=1.8.0,<2.0.0
+uvicorn>=0.15.0,<0.16.0
\ No newline at end of file
diff --git a/requirements_docker.txt b/requirements_docker.txt
deleted file mode 100644
index c4edfe35..00000000
--- a/requirements_docker.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-torch==1.12.1
-torchvision
-torchaudio
-rdkit
-openbabel-wheel==3.1.1.5
-biopython
-rdkit
-biopandas
-pot
-dgl
-joblib
-pyaml
-icecream
-matplotlib
-tensorboard
-psutil
-dgllife
-
-# fastapi>=0.68.0,<0.69.0
-# pydantic>=1.8.0,<2.0.0
-# uvicorn>=0.15.0,<0.16.0
\ No newline at end of file
diff --git a/test/test.txt b/test/test.sdf
similarity index 100%
rename from test/test.txt
rename to test/test.sdf