Merge pull request IDEA-Research#108 from Andy1621/lkc/develop

add chatbot, wait to check ChatGPT
hsaigroup · Apr 13, 2023 · 22aee1d · 22aee1d
2 parents 905fb4f + b6a0fa4
commit 22aee1d
Show file tree

Hide file tree

Showing 7 changed files with 1,483 additions and 17 deletions.
diff --git a/README.md b/README.md
@@ -28,10 +28,13 @@ The **core idea** behind this project is to **combine the strengths of different
 
 **BLIP + Grounded-SAM: Automatic Label System!**
 
-Using BLIP to generate caption, extract tags and using Grounded-SAM for box and mask generating. Here's the demo output:
+Using BLIP to generate caption, extracting tags with ChatGPT, and using Grounded-SAM for box and mask generating. Here's the demo output:
 
 ![](./assets/automatic_label_output_demo3.jpg)
 
+**ChatBot**
+![](./assets/chatbot_demo.png)
+
 
 **Imagine Space**
 
@@ -306,6 +309,24 @@ python grounded_sam_whisper_inpainting_demo.py \
 
 ![](./assets/acoustics/gsam_whisper_inpainting_pipeline.png)
 
+## :speech_balloon: Run ChatBot Demo
+Following [Visual ChatGPT](https://github.com/microsoft/visual-chatgpt), we add a ChatBot for our project. Currently, it supports:
+1. "Descripe the image."
+2. "Detect the dog (and the cat) in the image."
+3. "Segment anything in the image."
+4. "Segment the dog (and the cat) in the image."
+5. "Help me label the image."
+6. "Replace the dog with a cat in the image."
+
+To use the ChatBot:
+- Install whisper if you want to use audio as input.
+- Set the default model setting in the tool `Grounded_dino_sam_inpainting`.
+- Run Demo
+```bash
+export CUDA_VISIBLE_DEVICES=0
+python chatbot.py 
+```
+
 
 ## :cupid: Acknowledgements
 - [Segment Anything](https://github.com/facebookresearch/segment-anything)

diff --git a/assets/chatbot_demo.png b/assets/chatbot_demo.png
diff --git a/automatic_label_demo.py b/automatic_label_demo.py
@@ -264,9 +264,7 @@ def save_mask_data(output_dir, caption, mask_list, box_list, label_list):
     )
 
     # initialize SAM
-    sam = build_sam(checkpoint=sam_checkpoint)
-    sam.to(device=device)
-    predictor = SamPredictor(sam)
+    predictor = SamPredictor(build_sam(checkpoint=sam_checkpoint).to(device))
     image = cv2.imread(image_path)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     predictor.set_image(image)
@@ -293,7 +291,7 @@ def save_mask_data(output_dir, caption, mask_list, box_list, label_list):
     masks, _, _ = predictor.predict_torch(
         point_coords = None,
         point_labels = None,
-        boxes = transformed_boxes,
+        boxes = transformed_boxes.to(device),
         multimask_output = False,
     )
 

diff --git a/chatbot.py b/chatbot.py
diff --git a/grounded_sam_demo.py b/grounded_sam_demo.py
@@ -177,9 +177,7 @@ def save_mask_data(output_dir, mask_list, box_list, label_list):
     )
 
     # initialize SAM
-    sam = build_sam(checkpoint=sam_checkpoint)
-    sam.to(device=device)
-    predictor = SamPredictor(sam)
+    predictor = SamPredictor(build_sam(checkpoint=sam_checkpoint).to(device))
     image = cv2.imread(image_path)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     predictor.set_image(image)
@@ -197,7 +195,7 @@ def save_mask_data(output_dir, mask_list, box_list, label_list):
     masks, _, _ = predictor.predict_torch(
         point_coords = None,
         point_labels = None,
-        boxes = transformed_boxes,
+        boxes = transformed_boxes.to(device),
         multimask_output = False,
     )
 

diff --git a/grounded_sam_inpainting_demo.py b/grounded_sam_inpainting_demo.py
@@ -158,9 +158,7 @@ def show_box(box, ax, label):
     )
 
     # initialize SAM
-    sam = build_sam(checkpoint=sam_checkpoint)
-    sam.to(device=device)
-    predictor = SamPredictor(sam)
+    predictor = SamPredictor(build_sam(checkpoint=sam_checkpoint).to(device))
     image = cv2.imread(image_path)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     predictor.set_image(image)
@@ -178,7 +176,7 @@ def show_box(box, ax, label):
     masks, _, _ = predictor.predict_torch(
         point_coords = None,
         point_labels = None,
-        boxes = transformed_boxes,
+        boxes = transformed_boxes.to(device),
         multimask_output = False,
     )
 

diff --git a/grounded_sam_whisper_inpainting_demo.py b/grounded_sam_whisper_inpainting_demo.py
@@ -227,9 +227,7 @@ def filter_prompts_with_chatgpt(caption, max_tokens=100, model="gpt-3.5-turbo"):
     )
 
     # initialize SAM
-    sam = build_sam(checkpoint=sam_checkpoint)
-    sam.to(device=device)
-    predictor = SamPredictor(sam)
+    predictor = SamPredictor(build_sam(checkpoint=sam_checkpoint).to(device))
     image = cv2.imread(image_path)
     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     predictor.set_image(image)
@@ -247,7 +245,7 @@ def filter_prompts_with_chatgpt(caption, max_tokens=100, model="gpt-3.5-turbo"):
     masks, _, _ = predictor.predict_torch(
         point_coords = None,
         point_labels = None,
-        boxes = transformed_boxes,
+        boxes = transformed_boxes.to(device),
         multimask_output = False,
     )