From e49e50e26b2cf578055912cb8193fe25a0788148 Mon Sep 17 00:00:00 2001
From: cmgzn <zdongs@outlook.com>
Date: Fri, 27 Sep 2024 17:15:24 +0800
Subject: [PATCH] redactor: replace custom request with third-party API package
 (webuiapi) for SD-model invocation feat: add stablediffusion model  services

---
 setup.py                                      |   3 +
 .../models/stablediffusion_model.py           | 215 +++++++-----------
 src/agentscope/service/__init__.py            |   2 +
 .../stablediffusion_services.py               | 122 ++++++++++
 4 files changed, 206 insertions(+), 136 deletions(-)
 create mode 100644 src/agentscope/service/multi_modality/stablediffusion_services.py

diff --git a/setup.py b/setup.py
index cd577d5b8..e29fdd8f9 100644
--- a/setup.py
+++ b/setup.py
@@ -90,6 +90,7 @@
 extra_litellm_requires = ["litellm"]
 extra_zhipuai_requires = ["zhipuai"]
 extra_ollama_requires = ["ollama>=0.1.7"]
+extra_sd_webuiapi_requires = ["webuiapi"]
 
 # Full requires
 extra_full_requires = (
@@ -102,6 +103,7 @@
     + extra_litellm_requires
     + extra_zhipuai_requires
     + extra_ollama_requires
+    + extra_sd_webuiapi_requires
 )
 
 # For online workstation
@@ -140,6 +142,7 @@
         "litellm": extra_litellm_requires,
         "zhipuai": extra_zhipuai_requires,
         "gemini": extra_gemini_requires,
+        "stablediffusion": extra_sd_webuiapi_requires,
         # For service functions
         "service": extra_service_requires,
         # For distribution mode
diff --git a/src/agentscope/models/stablediffusion_model.py b/src/agentscope/models/stablediffusion_model.py
index c8f2d9548..2ab32d467 100644
--- a/src/agentscope/models/stablediffusion_model.py
+++ b/src/agentscope/models/stablediffusion_model.py
@@ -1,17 +1,16 @@
 # -*- coding: utf-8 -*-
 """Model wrapper for stable diffusion models."""
 from abc import ABC
-import base64
-import json
-import time
-from typing import Any, Optional, Union, Sequence
+from typing import Any, Union, Sequence
 
-import requests
 from loguru import logger
 
+try:
+    import webuiapi
+except ImportError:
+    webuiapi = None
+
 from . import ModelWrapperBase, ModelResponse
-from ..constants import _DEFAULT_MAX_RETRIES
-from ..constants import _DEFAULT_RETRY_INTERVAL
 from ..message import Msg
 from ..manager import FileManager
 from ..utils.common import _convert_to_str
@@ -23,9 +22,10 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
     To use SD-webui API, please
     1. First download stable-diffusion-webui from
     https://github.com/AUTOMATIC1111/stable-diffusion-webui and
-    install it with 'webui-user.bat'
+    install it
     2. Move your checkpoint to 'models/Stable-diffusion' folder
-    3. Start launch.py with the '--api' parameter to start the server
+    3. Start launch.py with the '--api --port=7862' parameter
+    4. Install the 'webuiapi' package by 'pip install webuiapi'
     After that, you can use the SD-webui API and
     query the available parameters on the http://localhost:7862/docs page
     """
@@ -35,15 +35,10 @@ class StableDiffusionWrapperBase(ModelWrapperBase, ABC):
     def __init__(
         self,
         config_name: str,
-        host: str = "127.0.0.1:7862",
-        base_url: Optional[Union[str, None]] = None,
-        use_https: bool = False,
         generate_args: dict = None,
-        headers: dict = None,
         options: dict = None,
-        timeout: int = 30,
-        max_retries: int = _DEFAULT_MAX_RETRIES,
-        retry_interval: int = _DEFAULT_RETRY_INTERVAL,
+        host: str = "127.0.0.1",
+        port: int = 7862,
         **kwargs: Any,
     ) -> None:
         """
@@ -52,46 +47,30 @@ def __init__(
         Args:
             config_name (`str`):
                 The name of the model config.
-            host (`str`, default `"127.0.0.1:7862"`):
-                The host port of the stable-diffusion webui server.
-            base_url (`str`, default `None`):
-                Base URL for the stable-diffusion webui services.
-                Generated from host and use_https if not provided.
-            use_https (`bool`, default `False`):
-                Whether to generate the base URL with HTTPS protocol or HTTP.
             generate_args (`dict`, default `None`):
                 The extra keyword arguments used in SD api generation,
                 e.g. `{"steps": 50}`.
-            headers (`dict`, default `None`):
-                HTTP request headers.
             options (`dict`, default `None`):
-                The keyword arguments to change the webui settings
+                The keyword arguments to change the sd-webui settings
                 such as model or CLIP skip, this changes will persist.
                 e.g. `{"sd_model_checkpoint": "Anything-V3.0-pruned"}`.
+            host (`str`, default `"127.0.0.1"`):
+                The host of the stable-diffusion webui server.
+            port (`int`, default `7862`):
+                The port of the stable-diffusion webui server.
         """
-        # Construct base_url based on HTTPS usage if not provided
-        if base_url is None:
-            if use_https:
-                base_url = f"https://{host}"
-            else:
-                base_url = f"http://{host}"
-
-        self.base_url = base_url
-        self.options_url = f"{base_url}/sdapi/v1/options"
+        # Initialize the SD-webui API
+        self.api = webuiapi.WebUIApi(host=host, port=port, **kwargs)
         self.generate_args = generate_args or {}
 
-        # Initialize the HTTP session and update the request headers
-        self.session = requests.Session()
-        if headers:
-            self.session.headers.update(headers)
-
         # Set options if provided
         if options:
-            self._set_options(options)
+            self.api.set_options(options)
+            logger.info(f"Set webui options: {options}")
 
         # Get the default model name from the web-options
         model_name = (
-            self._get_options()["sd_model_checkpoint"].split("[")[0].strip()
+            self.api.get_options()["sd_model_checkpoint"].split("[")[0].strip()
         )
         # Update the model name
         if self.generate_args.get("override_settings"):
@@ -102,116 +81,29 @@ def __init__(
 
         super().__init__(config_name=config_name, model_name=model_name)
 
-        self.timeout = timeout
-        self.max_retries = max_retries
-        self.retry_interval = retry_interval
-
-    @property
-    def url(self) -> str:
-        """SD-webui API endpoint URL"""
-        raise NotImplementedError()
-
-    def _get_options(self) -> dict:
-        response = self.session.get(url=self.options_url)
-        if response.status_code != 200:
-            logger.error(f"Failed to get options with {response.json()}")
-            raise RuntimeError(f"Failed to get options with {response.json()}")
-        return response.json()
-
-    def _set_options(self, options: dict) -> None:
-        response = self.session.post(url=self.options_url, json=options)
-        if response.status_code != 200:
-            logger.error(json.dumps(options, indent=4))
-            raise RuntimeError(f"Failed to set options with {response.json()}")
-        logger.info("Optionsset successfully")
-
-    def _invoke_model(self, payload: dict) -> dict:
-        """Invoke SD webui API and record the invocation if needed"""
-        # step1: prepare post requests
-        for i in range(1, self.max_retries + 1):
-            response = self.session.post(url=self.url, json=payload)
-
-            if response.status_code == requests.codes.ok:
-                break
-
-            if i < self.max_retries:
-                logger.warning(
-                    f"Failed to call the model with "
-                    f"requests.codes == {response.status_code}, retry "
-                    f"{i + 1}/{self.max_retries} times",
-                )
-                time.sleep(i * self.retry_interval)
-
-        # step2: record model invocation
-        # record the model api invocation, which will be skipped if
-        # `FileManager.save_api_invocation` is `False`
-        self._save_model_invocation(
-            arguments=payload,
-            response=response.json(),
-        )
-
-        # step3: return the response json
-        if response.status_code == requests.codes.ok:
-            return response.json()
-        else:
-            logger.error(
-                json.dumps({"url": self.url, "json": payload}, indent=4),
-            )
-            raise RuntimeError(
-                f"Failed to call the model with {response.json()}",
-            )
-
-    def _parse_response(self, response: dict) -> ModelResponse:
-        """Parse the response json data into ModelResponse"""
-        return ModelResponse(raw=response)
-
 
 class StableDiffusionImageSynthesisWrapper(StableDiffusionWrapperBase):
     """Stable Diffusion Text-to-Image (txt2img) API Wrapper"""
 
     model_type: str = "sd_txt2img"
 
-    @property
-    def url(self) -> str:
-        return f"{self.base_url}/sdapi/v1/txt2img"
-
-    def _parse_response(self, response: dict) -> ModelResponse:
-        session_parameters = response["parameters"]
-        size = f"{session_parameters['width']}*{session_parameters['height']}"
-        image_count = (
-            session_parameters["batch_size"] * session_parameters["n_iter"]
-        )
-
-        self.monitor.update_image_tokens(
-            model_name=self.model_name,
-            image_count=image_count,
-            resolution=size,
-        )
-
-        # Get image base64code as a list
-        images = response["images"]
-        b64_images = [base64.b64decode(image) for image in images]
-
-        file_manager = FileManager.get_instance()
-        # Return local url
-        image_urls = [file_manager.save_image(_) for _ in b64_images]
-        text = "Image saved to " + "\n".join(image_urls)
-        return ModelResponse(text=text, image_urls=image_urls, raw=response)
-
     def __call__(
         self,
         prompt: str,
+        save_local: bool = True,
         **kwargs: Any,
     ) -> ModelResponse:
         """
         Args:
             prompt (`str`):
                 The prompt string to generate images from.
+            save_local (`bool`, default `False`):
+                Whether to save the generated images locally.
             **kwargs (`Any`):
                 The keyword arguments to SD-webui txt2img API, e.g.
                 `n_iter`, `steps`, `seed`, `width`, etc. Please refer to
                 https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/API
-                or http://localhost:7860/docs
+                or http://localhost:7862/docs
                 for more detailed arguments.
         Returns:
             `ModelResponse`:
@@ -226,10 +118,61 @@ def __call__(
         }
 
         # step2: forward to generate response
-        response = self._invoke_model(payload)
+        response = self.api.txt2img(**payload)
+
+        # step3: save model invocation and update monitor
+        self._save_model_invocation_and_update_monitor(
+            payload=payload,
+            response=response.json,
+        )
+
+        # step4: parse the response
+        PIL_images = response.images
+
+        file_manager = FileManager.get_instance()
+        if save_local:
+            # Save images
+            image_urls = [file_manager.save_image(_) for _ in PIL_images]
+            text = "Image saved to " + "\n".join(image_urls)
+        else:
+            image_urls = PIL_images
+            text = None
+
+        return ModelResponse(
+            text=text,
+            image_urls=image_urls,
+            raw=response.json,
+        )
+
+    def _save_model_invocation_and_update_monitor(
+        self,
+        payload: dict,
+        response: dict,
+    ) -> None:
+        """Save the model invocation and update the monitor accordingly.
+
+        Args:
+            kwargs (`dict`):
+                The keyword arguments to the DashScope chat API.
+            response (`dict`):
+                The response object returned by the DashScope chat API.
+        """
+        self._save_model_invocation(
+            arguments=payload,
+            response=response,
+        )
+
+        session_parameters = response["parameters"]
+        size = f"{session_parameters['width']}*{session_parameters['height']}"
+        image_count = (
+            session_parameters["batch_size"] * session_parameters["n_iter"]
+        )
 
-        # step3: parse the response
-        return self._parse_response(response)
+        self.monitor.update_image_tokens(
+            model_name=self.model_name,
+            image_count=image_count,
+            resolution=size,
+        )
 
     def format(self, *args: Union[Msg, Sequence[Msg]]) -> str:
         # This is a temporary implementation to focus on the prompt
diff --git a/src/agentscope/service/__init__.py b/src/agentscope/service/__init__.py
index 7d33e6501..20c1af051 100644
--- a/src/agentscope/service/__init__.py
+++ b/src/agentscope/service/__init__.py
@@ -45,6 +45,7 @@
     openai_edit_image,
     openai_create_image_variation,
 )
+from .multi_modality.stablediffusion_services import sd_text_to_image
 
 from .service_response import ServiceResponse
 from .service_toolkit import ServiceToolkit
@@ -117,6 +118,7 @@ def get_help() -> None:
     "openai_image_to_text",
     "openai_edit_image",
     "openai_create_image_variation",
+    "sd_text_to_image",
     "tripadvisor_search",
     "tripadvisor_search_location_photos",
     "tripadvisor_search_location_details",
diff --git a/src/agentscope/service/multi_modality/stablediffusion_services.py b/src/agentscope/service/multi_modality/stablediffusion_services.py
new file mode 100644
index 000000000..4547aa115
--- /dev/null
+++ b/src/agentscope/service/multi_modality/stablediffusion_services.py
@@ -0,0 +1,122 @@
+# -*- coding: utf-8 -*-
+"""Use StableDiffusion-webui API to generate images
+"""
+import os
+from typing import Optional
+
+from ...models import StableDiffusionImageSynthesisWrapper
+
+from ...manager import FileManager
+from ..service_response import (
+    ServiceResponse,
+    ServiceExecStatus,
+)
+from ...utils.common import (
+    _get_timestamp,
+    _generate_random_code,
+)
+from ...constants import _DEFAULT_IMAGE_NAME
+
+
+def sd_text_to_image(
+    prompt: str,
+    n_iter: int = 1,
+    width: int = 1024,
+    height: int = 1024,
+    options: dict = None,
+    baseurl: str = None,
+    save_dir: Optional[str] = None,
+) -> ServiceResponse:
+    """Generate image(s) based on the given prompt, and return image url(s).
+
+    Args:
+        prompt (`str`):
+            The text prompt to generate image.
+        n (`int`, defaults to `1`):
+            The number of images to generate.
+        width (`int`, defaults to `1024`):
+            Width of the image.
+        height (`int`, defaults to `1024`):
+            Height of the image.
+        options (`dict`, defaults to `None`):
+            The options to override the sd-webui default settings.
+            If not specified, will use the default settings.
+        baseurl (`str`, defaults to `None`):
+            The base url of the sd-webui.
+        save_dir (`Optional[str]`, defaults to 'None'):
+            The directory to save the generated images. If not specified,
+            will return the web urls.
+
+    Returns:
+        ServiceResponse:
+        A dictionary with two variables: `status` and`content`.
+        If `status` is ServiceExecStatus.SUCCESS,
+        the `content` is a dict with key 'fig_paths" and
+        value is a list of the paths to the generated images.
+
+    Example:
+
+        .. code-block:: python
+
+            prompt = "A beautiful sunset in the mountains"
+            print(sd_text_to_image(prompt, 2))
+
+    > {
+    >     'status': 'SUCCESS',
+    >     'content': {'image_urls': ['IMAGE_URL1', 'IMAGE_URL2']}
+    > }
+
+    """
+    text2img = StableDiffusionImageSynthesisWrapper(
+        config_name="sd-text-to-image-service",  # Just a placeholder
+        baseurl=baseurl,
+    )
+    try:
+        kwargs = {"n_iter": n_iter, "width": width, "height": height}
+        if options:
+            kwargs["override_settings"] = options
+
+        res = text2img(prompt=prompt, save_local=False, **kwargs)
+        images = res.image_urls
+
+        # save images to save_dir
+        if images is not None:
+            if save_dir:
+                os.makedirs(save_dir, exist_ok=True)
+                urls_local = []
+                # Obtain the image file names in the url
+                for image in images:
+                    image_name = _DEFAULT_IMAGE_NAME.format(
+                        _get_timestamp(
+                            "%Y%m%d-%H%M%S",
+                        ),
+                        _generate_random_code(),
+                    )
+                    image_path = os.path.abspath(
+                        os.path.join(save_dir, image_name),
+                    )
+                    # Download the image
+                    image.save(image_path)
+                    urls_local.append(image_path)
+                return ServiceResponse(
+                    ServiceExecStatus.SUCCESS,
+                    {"image_urls": urls_local},
+                )
+            else:
+                # Return the default urls
+                file_manager = FileManager.get_instance()
+                urls = [file_manager.save_image(_) for _ in images]
+                return ServiceResponse(
+                    ServiceExecStatus.SUCCESS,
+                    {"image_urls": urls},
+                )
+        else:
+            return ServiceResponse(
+                ServiceExecStatus.ERROR,
+                "Error: Failed to generate images",
+            )
+    except Exception as e:
+        return ServiceResponse(
+            ServiceExecStatus.ERROR,
+            str(e),
+        )