From 32514d14526331a62f711cfc672d14b25d37841d Mon Sep 17 00:00:00 2001 From: HonzaCuhel Date: Fri, 31 Jan 2025 13:49:30 +0100 Subject: [PATCH 1/5] Add SAM2.1 annotator --- README.md | 3 +- datadreamer/dataset_annotation/__init__.py | 2 + .../dataset_annotation/sam2_annotator.py | 136 ++++++++++++++++++ .../generate_dataset_from_scratch.py | 12 +- datadreamer/utils/config.py | 4 +- .../generate_dataset_and_train_yolo.ipynb | 7 +- ..._segmentation_dataset_and_train_yolo.ipynb | 4 +- examples/helmet_detection.ipynb | 7 +- ..._by_step_dataset_generation_pipeline.ipynb | 7 + requirements.txt | 8 +- tests/core_tests/unittests/test_annotators.py | 55 +++++++ 11 files changed, 223 insertions(+), 22 deletions(-) create mode 100644 datadreamer/dataset_annotation/sam2_annotator.py diff --git a/README.md b/README.md index 8d88ebe..0a8cfec 100644 --- a/README.md +++ b/README.md @@ -181,7 +181,7 @@ datadreamer --config - `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3. - `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`. - `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo`, `sdxl-lightning` or `shuttle-3`. Default is `sdxl-turbo`. -- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `aimv2` or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`. +- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `aimv2` or `clip` for image classification or `owlv2-slimsam` and `owlv2-sam2` for instance segmentation. Default is `owlv2`. - `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`. - `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`. - `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `""`. @@ -221,6 +221,7 @@ datadreamer --config | | [CLIP](https://huggingface.co/openai/clip-vit-base-patch32) | Zero-shot-image-classification | | | [AIMv2](https://huggingface.co/apple/aimv2-large-patch14-224-lit) | Zero-shot-image-classification | | | [SlimSAM](https://huggingface.co/Zigeng/SlimSAM-uniform-50) | Zero-shot-instance-segmentation | +| | [SAM2.1](https://huggingface.co/facebook/sam2-hiera-tiny) | Zero-shot-instance-segmentation | diff --git a/datadreamer/dataset_annotation/__init__.py b/datadreamer/dataset_annotation/__init__.py index 3fe9f5d..5fcf62c 100644 --- a/datadreamer/dataset_annotation/__init__.py +++ b/datadreamer/dataset_annotation/__init__.py @@ -5,6 +5,7 @@ from .cls_annotator import ImgClassificationAnnotator from .image_annotator import BaseAnnotator, TaskList from .owlv2_annotator import OWLv2Annotator +from .sam2_annotator import SAM2Annotator from .slimsam_annotator import SlimSAMAnnotator __all__ = [ @@ -14,5 +15,6 @@ "OWLv2Annotator", "ImgClassificationAnnotator", "CLIPAnnotator", + "SAM2Annotator", "SlimSAMAnnotator", ] diff --git a/datadreamer/dataset_annotation/sam2_annotator.py b/datadreamer/dataset_annotation/sam2_annotator.py new file mode 100644 index 0000000..2c50805 --- /dev/null +++ b/datadreamer/dataset_annotation/sam2_annotator.py @@ -0,0 +1,136 @@ +from __future__ import annotations + +import logging +from typing import List + +import numpy as np +import PIL +import torch +from sam2.sam2_image_predictor import SAM2ImagePredictor + +from datadreamer.dataset_annotation.image_annotator import BaseAnnotator +from datadreamer.dataset_annotation.utils import mask_to_polygon + +logger = logging.getLogger(__name__) + + +class SAM2Annotator(BaseAnnotator): + """A class for image annotation using the SAM2.1 model, specializing in instance + segmentation. + + Attributes: + model (SAM2ImagePredictor): The SAM2.1 model for instance segmentation. + device (str): The device on which the model will run ('cuda' for GPU, 'cpu' for CPU). + size (str): The size of the SAM model to use ('base' or 'large'). + + Methods: + _init_model(): Initializes the SAM2.1 model. + annotate_batch(image, prompts, conf_threshold, use_tta, synonym_dict): Annotates the given image with bounding boxes and labels. + release(empty_cuda_cache): Releases resources and optionally empties the CUDA cache. + """ + + def __init__( + self, + seed: float = 42, + device: str = "cuda", + size: str = "base", + ) -> None: + """Initializes the SAMAnnotator with a specific seed and device. + + Args: + seed (float): Seed for reproducibility. Defaults to 42. + device (str): The device to run the model on. Defaults to 'cuda'. + """ + super().__init__(seed) + self.size = size + self.device = device + self.model = self._init_model(device=device) + self.dtype = torch.bfloat16 if self.device == "cuda" else torch.float16 + + def _init_model(self, device: str) -> SAM2ImagePredictor: + """Initializes the SAM2.1 model for object detection. + + Returns: + SAM2ImagePredictor: The initialized SAM2.1 model. + """ + logger.info(f"Initializing SAM2.1 {self.size} model...") + if self.size == "large": + return SAM2ImagePredictor.from_pretrained( + "facebook/sam2.1-hiera-base-plus", device=device + ) + return SAM2ImagePredictor.from_pretrained( + "facebook/sam2-hiera-tiny", device=device + ) + + def annotate_batch( + self, + images: List[PIL.Image.Image], + boxes_batch: List[np.ndarray], + iou_threshold: float = 0.2, + ) -> List[List[List[float]]]: + """Annotates images for the task of instance segmentation using the SAM2.1 + model. + + Args: + images: The images to be annotated. + boxes_batch: The bounding boxes of found objects. + iou_threshold (float, optional): Intersection over union threshold for non-maximum suppression. Defaults to 0.2. + + Returns: + List: A list containing the final segment masks represented as a polygon. + """ + final_segments = [] + + image_batch = [np.array(img.convert("RGB")) for img in images] + bboxes_batch = [None if len(boxes) == 0 else boxes for boxes in boxes_batch] + + with torch.inference_mode(), torch.autocast(self.device, dtype=self.dtype): + self.model.set_image_batch(image_batch) + masks_batch, scores_batch, _ = self.model.predict_batch( + box_batch=bboxes_batch, + multimask_output=False, + ) + + n = len(images) + + for i in range(n): + boxes = boxes_batch[i].tolist() + if boxes is None: + final_segments.append([]) + continue + + image_masks = [] + for j in range(len(boxes)): + mask, score = masks_batch[i][j], scores_batch[i][j] + if score < iou_threshold: + image_masks.append([]) + continue + mask = mask.astype(np.uint8) + polygon = mask_to_polygon(mask) + image_masks.append(polygon if len(polygon) != 0 else []) + + final_segments.append(image_masks) + + return final_segments + + def release(self, empty_cuda_cache: bool = False) -> None: + """Releases the model and optionally empties the CUDA cache. + + Args: + empty_cuda_cache (bool, optional): Whether to empty the CUDA cache. Defaults to False. + """ + if empty_cuda_cache: + with torch.no_grad(): + torch.cuda.empty_cache() + + +if __name__ == "__main__": + import requests + from PIL import Image + + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = SAM2Annotator(device="cpu", size="base") + final_segments = annotator.annotate_batch([im], [np.array([[3, 229, 559, 650]])]) + print(len(final_segments), len(final_segments[0])) + print(final_segments[0][0][:5]) diff --git a/datadreamer/pipelines/generate_dataset_from_scratch.py b/datadreamer/pipelines/generate_dataset_from_scratch.py index 56ce04d..ef0fe55 100644 --- a/datadreamer/pipelines/generate_dataset_from_scratch.py +++ b/datadreamer/pipelines/generate_dataset_from_scratch.py @@ -20,6 +20,7 @@ AIMv2Annotator, CLIPAnnotator, OWLv2Annotator, + SAM2Annotator, SlimSAMAnnotator, ) from datadreamer.image_generation import ( @@ -61,8 +62,8 @@ det_annotators = {"owlv2": OWLv2Annotator} clf_annotators = {"clip": CLIPAnnotator, "aimv2": AIMv2Annotator} -inst_seg_annotators = {"owlv2-slimsam": SlimSAMAnnotator} -inst_seg_detectors = {"owlv2-slimsam": OWLv2Annotator} +inst_seg_annotators = {"owlv2-slimsam": SlimSAMAnnotator, "owlv2-sam2": SAM2Annotator} +inst_seg_detectors = {"owlv2-slimsam": OWLv2Annotator, "owlv2-sam2": OWLv2Annotator} setup_logging(use_rich=True) @@ -125,7 +126,7 @@ def parse_args(): parser.add_argument( "--image_annotator", type=str, - choices=["owlv2", "clip", "owlv2-slimsam", "aimv2"], + choices=["owlv2", "clip", "owlv2-slimsam", "aimv2", "owlv2-sam2"], help="Image annotator to use", ) @@ -668,9 +669,10 @@ def read_image_batch(image_batch, batch_num, batch_size): if args.task == "instance-segmentation": if k < len(masks_batch[j]): mask = masks_batch[j][k] - x_points, y_points = zip(*mask) + if len(mask) > 0: + x_points, y_points = zip(*mask) - ax.fill(x_points, y_points, label, alpha=0.5) + ax.fill(x_points, y_points, label, alpha=0.5) labels.append(label) x1, y1, x2, y2 = box diff --git a/datadreamer/utils/config.py b/datadreamer/utils/config.py index 1c90b21..2b73645 100644 --- a/datadreamer/utils/config.py +++ b/datadreamer/utils/config.py @@ -41,7 +41,9 @@ class Config(LuxonisConfig): # Profanity filter arguments disable_lm_filter: bool = False # Annotation arguments - image_annotator: Literal["owlv2", "aimv2", "clip", "owlv2-slimsam"] = "owlv2" + image_annotator: Literal[ + "owlv2", "aimv2", "clip", "owlv2-slimsam", "owlv2-sam2" + ] = "owlv2" conf_threshold: float = 0.15 annotation_iou_threshold: float = 0.2 use_tta: bool = False diff --git a/examples/generate_dataset_and_train_yolo.ipynb b/examples/generate_dataset_and_train_yolo.ipynb index ecfa5d7..a148fb6 100644 --- a/examples/generate_dataset_and_train_yolo.ipynb +++ b/examples/generate_dataset_and_train_yolo.ipynb @@ -5,7 +5,7 @@ "id": "11adc87f", "metadata": {}, "source": [ - "\n", + "\n", "\n", "# DataDreamer Tutorial: Generating a dataset for object detection, training a model, and deploying it to the OAK (optional)" ] @@ -85,7 +85,7 @@ "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n", "- `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`.\n", "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo`, `sdxl-lightning` or `shuttle-3`. Default is `sdxl-turbo`.\n", - "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `aimv2` or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`.\n", + "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `aimv2` or `clip` for image classification or `owlv2-slimsam` and `owlv2-sam2` for instance segmentation. Default is `owlv2`.\n", "- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.\n", "- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.\n", "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n", @@ -104,8 +104,7 @@ "- `--batch_size_image`: Batch size for image generation. Default is `1`.\n", "- `--device`: Choose between `cuda` and `cpu`. Default is `cuda`.\n", "- `--seed`: Set a random seed for image and prompt generation. Default is `42`.\n", - "- `--config`: A path to an optional `.yaml` config file specifying the pipeline's arguments.\n", - "" + "- `--config`: A path to an optional `.yaml` config file specifying the pipeline's arguments.\n" ] }, { diff --git a/examples/generate_instance_segmentation_dataset_and_train_yolo.ipynb b/examples/generate_instance_segmentation_dataset_and_train_yolo.ipynb index 97a1b3d..8b1b7c2 100644 --- a/examples/generate_instance_segmentation_dataset_and_train_yolo.ipynb +++ b/examples/generate_instance_segmentation_dataset_and_train_yolo.ipynb @@ -7,7 +7,7 @@ "id": "8ce1517f-7258-406d-9139-9adadb1a1570" }, "source": [ - "\n", + "\n", "\n", "# DataDreamer Tutorial: Generating a dataset for instance segmentation, training a model, and deploying it to the OAK (optional)" ] @@ -99,7 +99,7 @@ "- `--num_objects_range`: Range of objects in a prompt. Default is 1 to 3.\n", "- `--prompt_generator`: Choose between `simple`, `lm` (Mistral-7B), `tiny` (tiny LM), and `qwen2` (Qwen2.5 LM). Default is `qwen2`.\n", "- `--image_generator`: Choose image generator, e.g., `sdxl`, `sdxl-turbo`, `sdxl-lightning` or `shuttle-3`. Default is `sdxl-turbo`.\n", - "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `aimv2` or `clip` for image classification or `owlv2-slimsam` for instance segmentation. Default is `owlv2`.\n", + "- `--image_annotator`: Specify the image annotator, like `owlv2` for object detection or `aimv2` or `clip` for image classification or `owlv2-slimsam` and `owlv2-sam2` for instance segmentation. Default is `owlv2`.\n", "- `--conf_threshold`: Confidence threshold for annotation. Default is `0.15`.\n", "- `--annotation_iou_threshold`: Intersection over Union (IoU) threshold for annotation. Default is `0.2`.\n", "- `--prompt_prefix`: Prefix to add to every image generation prompt. Default is `\"\"`.\n", diff --git a/examples/helmet_detection.ipynb b/examples/helmet_detection.ipynb index 9de96b0..4da0ec0 100644 --- a/examples/helmet_detection.ipynb +++ b/examples/helmet_detection.ipynb @@ -1,15 +1,10 @@ { "cells": [ { - "attachments": { - "image.png": { - "image/png": "" - } - }, "cell_type": "markdown", "metadata": {}, "source": [ - "![image.png](attachment:image.png)" + "" ] }, { diff --git a/examples/step_by_step_dataset_generation_pipeline.ipynb b/examples/step_by_step_dataset_generation_pipeline.ipynb index b3466a1..d04ee63 100644 --- a/examples/step_by_step_dataset_generation_pipeline.ipynb +++ b/examples/step_by_step_dataset_generation_pipeline.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/requirements.txt b/requirements.txt index cdd8a50..7d2c8e1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -torch>=2.0.0 -torchvision>=0.16.0 +torch>=2.5.1 +torchvision>=0.20.1 transformers>=4.45.2 diffusers>=0.31.0 compel>=2.0.0 @@ -16,4 +16,6 @@ luxonis-ml[all]>=0.5.0 python-box>=7.1.1 gcsfs>=2023.1.0 sentencepiece>=0.2.0 -optimum-quanto>=0.2.6 \ No newline at end of file +optimum-quanto>=0.2.6 +huggingface_hub>=0.28.1 +SAM-2 @ git+https://github.com/facebookresearch/sam2.git \ No newline at end of file diff --git a/tests/core_tests/unittests/test_annotators.py b/tests/core_tests/unittests/test_annotators.py index eb5c986..0798cff 100644 --- a/tests/core_tests/unittests/test_annotators.py +++ b/tests/core_tests/unittests/test_annotators.py @@ -10,6 +10,7 @@ from datadreamer.dataset_annotation.aimv2_annotator import AIMv2Annotator from datadreamer.dataset_annotation.clip_annotator import CLIPAnnotator from datadreamer.dataset_annotation.owlv2_annotator import OWLv2Annotator +from datadreamer.dataset_annotation.sam2_annotator import SAM2Annotator from datadreamer.dataset_annotation.slimsam_annotator import SlimSAMAnnotator # Get the total disk space in GB @@ -183,3 +184,57 @@ def test_cuda_slimsam_large_annotator(): ) def test_cpu_slimsam_large_annotator(): _check_slimsam_annotator("cpu", size="large") + + +def _check_sam2_annotator(device: str, size: str = "base"): + url = "https://ultralytics.com/images/bus.jpg" + im = Image.open(requests.get(url, stream=True).raw) + annotator = SAM2Annotator(device=device, size=size) + masks = annotator.annotate_batch([im], [np.array([[3, 229, 559, 650]])]) + w, h = im.width, im.height + # Check that the masks are lists + assert isinstance(masks, list) and len(masks) == 1 + # Check that the masks are [B, O, N, 2], where + # - B = batch size + # - O = number of objects + # - N = number of points of the mask segment polygon (at least 3 to be polygon) + assert isinstance(masks[0], list) and len(masks[0]) == 1 + assert isinstance(masks[0][0], list) and len(masks[0][0]) >= 3 + for point in masks[0][0]: + # Check that it is a 2D point + assert len(point) == 2 + assert 0 <= point[0] <= w and 0 <= point[1] <= h + + annotator.release(empty_cuda_cache=True if device != "cpu" else False) + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_sam2_base_annotator(): + _check_sam2_annotator("cuda") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_sam2_base_annotator(): + _check_sam2_annotator("cpu") + + +@pytest.mark.skipif( + not torch.cuda.is_available() or total_disk_space < 16, + reason="Test requires GPU and 16GB of HDD", +) +def test_cuda_sam2_large_annotator(): + _check_sam2_annotator("cuda", size="large") + + +@pytest.mark.skipif( + total_disk_space < 16, + reason="Test requires at least 16GB of HDD", +) +def test_cpu_sam2_large_annotator(): + _check_sam2_annotator("cpu", size="large") From bdffe962ab4ed92b41dc5445cc272e00c2cf3c81 Mon Sep 17 00:00:00 2001 From: HonzaCuhel Date: Fri, 31 Jan 2025 15:58:53 +0100 Subject: [PATCH 2/5] Squeeze the mask if it's 3D --- datadreamer/dataset_annotation/sam2_annotator.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datadreamer/dataset_annotation/sam2_annotator.py b/datadreamer/dataset_annotation/sam2_annotator.py index 2c50805..ceb55e6 100644 --- a/datadreamer/dataset_annotation/sam2_annotator.py +++ b/datadreamer/dataset_annotation/sam2_annotator.py @@ -101,11 +101,12 @@ def annotate_batch( image_masks = [] for j in range(len(boxes)): - mask, score = masks_batch[i][j], scores_batch[i][j] + mask, score = masks_batch[i][j].astype(np.uint8), scores_batch[i][j] if score < iou_threshold: image_masks.append([]) continue - mask = mask.astype(np.uint8) + if len(mask.shape) == 3: + mask = mask.squeeze(0) polygon = mask_to_polygon(mask) image_masks.append(polygon if len(polygon) != 0 else []) From fe83647bbadd4084f24f2066ad1abef55f039b75 Mon Sep 17 00:00:00 2001 From: HonzaCuhel Date: Sat, 1 Feb 2025 15:14:10 +0100 Subject: [PATCH 3/5] Update dependencies --- requirements.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7d2c8e1..59b9e3a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ torch>=2.5.1 torchvision>=0.20.1 -transformers>=4.45.2 -diffusers>=0.31.0 +transformers>=4.48.2 +diffusers>=0.32.2 compel>=2.0.0 tqdm>=4.0.0 Pillow>=9.0.0 numpy>=1.22.0 matplotlib>=3.6.0 opencv-python>=4.7.0 -accelerate>=0.25.0 +accelerate>=1.3.0 scipy>=1.10.0 bitsandbytes>=0.42.0 nltk>=3.8.1 From 37e2c46663913ead00bab2b9d72f89d65fafd7b7 Mon Sep 17 00:00:00 2001 From: HonzaCuhel Date: Mon, 3 Feb 2025 15:43:29 +0100 Subject: [PATCH 4/5] Update model sizes --- README.md | 2 +- datadreamer/dataset_annotation/sam2_annotator.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0a8cfec..3f3786b 100644 --- a/README.md +++ b/README.md @@ -221,7 +221,7 @@ datadreamer --config | | [CLIP](https://huggingface.co/openai/clip-vit-base-patch32) | Zero-shot-image-classification | | | [AIMv2](https://huggingface.co/apple/aimv2-large-patch14-224-lit) | Zero-shot-image-classification | | | [SlimSAM](https://huggingface.co/Zigeng/SlimSAM-uniform-50) | Zero-shot-instance-segmentation | -| | [SAM2.1](https://huggingface.co/facebook/sam2-hiera-tiny) | Zero-shot-instance-segmentation | +| | [SAM2.1](https://huggingface.co/facebook/sam2.1-hiera-large) | Zero-shot-instance-segmentation | diff --git a/datadreamer/dataset_annotation/sam2_annotator.py b/datadreamer/dataset_annotation/sam2_annotator.py index ceb55e6..ad16d6c 100644 --- a/datadreamer/dataset_annotation/sam2_annotator.py +++ b/datadreamer/dataset_annotation/sam2_annotator.py @@ -56,10 +56,10 @@ def _init_model(self, device: str) -> SAM2ImagePredictor: logger.info(f"Initializing SAM2.1 {self.size} model...") if self.size == "large": return SAM2ImagePredictor.from_pretrained( - "facebook/sam2.1-hiera-base-plus", device=device + "facebook/sam2-hiera-large", device=device ) return SAM2ImagePredictor.from_pretrained( - "facebook/sam2-hiera-tiny", device=device + "facebook/sam2.1-hiera-base-plus", device=device ) def annotate_batch( @@ -131,7 +131,7 @@ def release(self, empty_cuda_cache: bool = False) -> None: url = "https://ultralytics.com/images/bus.jpg" im = Image.open(requests.get(url, stream=True).raw) - annotator = SAM2Annotator(device="cpu", size="base") + annotator = SAM2Annotator(device="cpu", size="large") final_segments = annotator.annotate_batch([im], [np.array([[3, 229, 559, 650]])]) print(len(final_segments), len(final_segments[0])) print(final_segments[0][0][:5]) From f8ef89c176b4af11001ca71797341c63091a425e Mon Sep 17 00:00:00 2001 From: HonzaCuhel Date: Tue, 4 Feb 2025 08:26:57 +0100 Subject: [PATCH 5/5] Fix dependencies --- requirements.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/requirements.txt b/requirements.txt index 59b9e3a..bb3ce9a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ -torch>=2.5.1 -torchvision>=0.20.1 -transformers>=4.48.2 -diffusers>=0.32.2 +torch>=2.0.0,<=2.5.1 +torchvision>=0.16.0 +transformers>=4.45.2 +diffusers>=0.31.0 compel>=2.0.0 tqdm>=4.0.0 Pillow>=9.0.0 numpy>=1.22.0 matplotlib>=3.6.0 opencv-python>=4.7.0 -accelerate>=1.3.0 +accelerate>=0.25.0 scipy>=1.10.0 bitsandbytes>=0.42.0 nltk>=3.8.1