bananaml · ErikKaum · Dec 16, 2022 · Dec 18, 2022 · Dec 18, 2022 · Dec 18, 2022
diff --git a/Dockerfile b/Dockerfile
@@ -11,6 +11,10 @@ RUN pip3 install --upgrade pip
 ADD requirements.txt requirements.txt
 RUN pip3 install -r requirements.txt
 
+# In this example, we can define the hugging face model as an ENV variable
+# and from here pass it to download.py & app.py
+ENV HF_MODEL_NAME bert-base-uncased
+
 # We add the banana boilerplate here
 ADD server.py .
 

diff --git a/README.md b/README.md
@@ -17,4 +17,37 @@ Generalize this framework to [deploy anything on Banana](https://docs.banana.dev
 
 <br>
 
+# Local testing
+
+## With docker
+
+To test the Serverless Framework with docker locally, you need to build the docker container and then run it.
+In the root of this directory, run:
+```
+docker build . -t serverless-template
+```
+After which you can run the container. Here we also forward the port to access the localhost url outside of the
+container and enable gpu acceleration.
+```
+docker run -p 8000:8000 --gpus=all serverless-template
+```
+
+## Without docker
+
+Testing your code without docker is straight forward. Remember to pass in the Hugging Face model name as 
+an ENV variable. In this case:
+```
+export HF_MODEL_NAME=bert-base-uncased
+```
+Make sure you have the required dependencies:
+```
+pip3 install -r requirements.txt
+```
+And then simply run the server.py
+```
+python3 server.py
+```
+
+<br>
+
 ## Use Banana for scale.
diff --git a/app.py b/app.py
@@ -1,13 +1,17 @@
 from transformers import pipeline
 import torch
+import os
 
 # Init is ran on server startup
 # Load your model to GPU as a global variable here using the variable name "model"
 def init():
     global model
+
+    # In this example, we get the model name as an ENV variable defined in the Dockerfile
+    hf_model_name = os.getenv("HF_MODEL_NAME")
 
     device = 0 if torch.cuda.is_available() else -1
-    model = pipeline('fill-mask', model='bert-base-uncased', device=device)
+    model = pipeline('fill-mask', model=hf_model_name, device=device)
 
 # Inference is ran for every server call
 # Reference your preloaded global model variable here.

diff --git a/download.py b/download.py
@@ -4,10 +4,15 @@
 # In this example: A Huggingface BERT model
 
 from transformers import pipeline
+import os
 
 def download_model():
+
+    # In this example, we get the model name as an ENV variable defined in the Dockerfile
+    hf_model_name = os.getenv("HF_MODEL_NAME")
+
     # do a dry run of loading the huggingface model, which will download weights
-    pipeline('fill-mask', model='bert-base-uncased')
+    pipeline('fill-mask', model=hf_model_name)
 
 if __name__ == "__main__":
     download_model()