Merge branch 'master' of https://github.com/lshqqytiger/stable-diffus…

…ion-webui-amdgpu
ryankashi · Sep 2, 2024 · e06920f · e06920f
2 parents 4e0eff8 + d8b7380
commit e06920f
Show file tree

Hide file tree

Showing 79 changed files with 6,744 additions and 632 deletions.
diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml
@@ -1,5 +1,5 @@
 blank_issues_enabled: false
 contact_links:
   - name: WebUI Community Support
-    url: https://github.com/AUTOMATIC1111/stable-diffusion-webui/discussions
+    url: https://github.com/lshqqytiger/stable-diffusion-webui/discussions
     about: Please ask and answer questions here.
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,8 @@ __pycache__
 /repositories
 /venv
 /tmp
+/cache
+/footprints
 /model.ckpt
 /models/**/*
 /GFPGANv1.3.pth

diff --git a/README.md b/README.md
@@ -1,10 +1,13 @@
-# Stable Diffusion web UI
+# Stable Diffusion web UI for AMDGPUs
+
 A web interface for Stable Diffusion, implemented using Gradio library.
 
 ![](screenshot.png)
 
 ## Features
+
 [Detailed feature showcase with images](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Features):
+
 - Original txt2img and img2img modes
 - One click install and run script (but you still must install python and git)
 - Outpainting
@@ -13,45 +16,45 @@ A web interface for Stable Diffusion, implemented using Gradio library.
 - Prompt Matrix
 - Stable Diffusion Upscale
 - Attention, specify parts of text that the model should pay more attention to
-    - a man in a `((tuxedo))` - will pay more attention to tuxedo
-    - a man in a `(tuxedo:1.21)` - alternative syntax
-    - select text and press `Ctrl+Up` or `Ctrl+Down` (or `Command+Up` or `Command+Down` if you're on a MacOS) to automatically adjust attention to selected text (code contributed by anonymous user)
+  - a man in a `((tuxedo))` - will pay more attention to tuxedo
+  - a man in a `(tuxedo:1.21)` - alternative syntax
+  - select text and press `Ctrl+Up` or `Ctrl+Down` (or `Command+Up` or `Command+Down` if you're on a MacOS) to automatically adjust attention to selected text (code contributed by anonymous user)
 - Loopback, run img2img processing multiple times
 - X/Y/Z plot, a way to draw a 3 dimensional plot of images with different parameters
 - Textual Inversion
-    - have as many embeddings as you want and use any names you like for them
-    - use multiple embeddings with different numbers of vectors per token
-    - works with half precision floating point numbers
-    - train embeddings on 8GB (also reports of 6GB working)
+  - have as many embeddings as you want and use any names you like for them
+  - use multiple embeddings with different numbers of vectors per token
+  - works with half precision floating point numbers
+  - train embeddings on 8GB (also reports of 6GB working)
 - Extras tab with:
-    - GFPGAN, neural network that fixes faces
-    - CodeFormer, face restoration tool as an alternative to GFPGAN
-    - RealESRGAN, neural network upscaler
-    - ESRGAN, neural network upscaler with a lot of third party models
-    - SwinIR and Swin2SR ([see here](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/2092)), neural network upscalers
-    - LDSR, Latent diffusion super resolution upscaling
+  - GFPGAN, neural network that fixes faces
+  - CodeFormer, face restoration tool as an alternative to GFPGAN
+  - RealESRGAN, neural network upscaler
+  - ESRGAN, neural network upscaler with a lot of third party models
+  - SwinIR and Swin2SR ([see here](https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/2092)), neural network upscalers
+  - LDSR, Latent diffusion super resolution upscaling
 - Resizing aspect ratio options
 - Sampling method selection
-    - Adjust sampler eta values (noise multiplier)
-    - More advanced noise setting options
+  - Adjust sampler eta values (noise multiplier)
+  - More advanced noise setting options
 - Interrupt processing at any time
 - 4GB video card support (also reports of 2GB working)
 - Correct seeds for batches
 - Live prompt token length validation
 - Generation parameters
-     - parameters you used to generate images are saved with that image
-     - in PNG chunks for PNG, in EXIF for JPEG
-     - can drag the image to PNG info tab to restore generation parameters and automatically copy them into UI
-     - can be disabled in settings
-     - drag and drop an image/text-parameters to promptbox
+  - parameters you used to generate images are saved with that image
+  - in PNG chunks for PNG, in EXIF for JPEG
+  - can drag the image to PNG info tab to restore generation parameters and automatically copy them into UI
+  - can be disabled in settings
+  - drag and drop an image/text-parameters to promptbox
 - Read Generation Parameters Button, loads parameters in promptbox to UI
 - Settings page
 - Running arbitrary python code from UI (must run with `--allow-code` to enable)
 - Mouseover hints for most UI elements
 - Possible to change defaults/mix/max/step values for UI elements via text config
 - Tiling support, a checkbox to create images that can be tiled like textures
 - Progress bar and live image generation preview
-    - Can use a separate neural network to produce previews with almost none VRAM or compute requirement
+  - Can use a separate neural network to produce previews with almost none VRAM or compute requirement
 - Negative prompt, an extra text field that allows you to list what you don't want to see in generated image
 - Styles, a way to save part of prompt and easily apply them via dropdown later
 - Variations, a way to generate same image but with tiny differences
@@ -65,16 +68,16 @@ A web interface for Stable Diffusion, implemented using Gradio library.
 - Checkpoint Merger, a tab that allows you to merge up to 3 checkpoints into one
 - [Custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Custom-Scripts) with many extensions from community
 - [Composable-Diffusion](https://energy-based-model.github.io/Compositional-Visual-Generation-with-Composable-Diffusion-Models/), a way to use multiple prompts at once
-     - separate prompts using uppercase `AND`
-     - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
+  - separate prompts using uppercase `AND`
+  - also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
 - No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
 - DeepDanbooru integration, creates danbooru style tags for anime prompts
 - [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards: (add `--xformers` to commandline args)
 - via extension: [History tab](https://github.com/yfszzx/stable-diffusion-webui-images-browser): view, direct and delete images conveniently within the UI
 - Generate forever option
 - Training tab
-     - hypernetworks and embeddings options
-     - Preprocessing images: cropping, mirroring, autotagging using BLIP or deepdanbooru (for anime)
+  - hypernetworks and embeddings options
+  - Preprocessing images: cropping, mirroring, autotagging using BLIP or deepdanbooru (for anime)
 - Clip skip
 - Hypernetworks
 - Loras (same as Hypernetworks but more pretty)
@@ -93,8 +96,21 @@ A web interface for Stable Diffusion, implemented using Gradio library.
 - Reorder elements in the UI from settings screen
 - [Segmind Stable Diffusion](https://huggingface.co/segmind/SSD-1B) support
 
+## What's different from upstream repo?
+
+**[DirectML](https://github.com/microsoft/DirectML)** support for every GPUs that support DirectX 12 API.
+
+**[ZLUDA](https://github.com/vosen/ZLUDA)** support for AMDGPUs.
+
+- `--use-directml`: Use DirectML as a torch backend.
+- `--use-zluda`: Use ZLUDA as a torch backend.
+- Support [ONNX Runtime](https://github.com/microsoft/onnxruntime).
+- Support [Olive](https://github.com/microsoft/Olive) model optimization.
+
 ## Installation and Running
+
 Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for:
+
 - [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended)
 - [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
 - [Intel CPUs, Intel GPUs (both integrated and discrete)](https://github.com/openvinotoolkit/stable-diffusion-webui/wiki/Installation-on-Intel-Silicon) (external wiki page)
@@ -105,19 +121,23 @@ Alternatively, use online services (like Google Colab):
 - [List of Online Services](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Online-Services)
 
 ### Installation on Windows 10/11 with NVidia-GPUs using release package
+
 1. Download `sd.webui.zip` from [v1.0.0-pre](https://github.com/AUTOMATIC1111/stable-diffusion-webui/releases/tag/v1.0.0-pre) and extract its contents.
 2. Run `update.bat`.
 3. Run `run.bat`.
-> For more details see [Install-and-Run-on-NVidia-GPUs](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs)
+   > For more details see [Install-and-Run-on-NVidia-GPUs](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs)
 
 ### Automatic Installation on Windows
+
 1. Install [Python 3.10.6](https://www.python.org/downloads/release/python-3106/) (Newer version of Python does not support torch), checking "Add Python to PATH".
 2. Install [git](https://git-scm.com/download/win).
-3. Download the stable-diffusion-webui repository, for example by running `git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui.git`.
+3. Download the stable-diffusion-webui-amdgpu repository, for example by running `git clone https://github.com/lshqqytiger/stable-diffusion-webui-amdgpu.git`.
 4. Run `webui-user.bat` from Windows Explorer as normal, non-administrator, user.
 
 ### Automatic Installation on Linux
+
 1. Install the dependencies:
+
 ```bash
 # Debian-based:
 sudo apt install wget git python3 python3-venv libgl1 libglib2.0-0
@@ -128,7 +148,9 @@ sudo zypper install wget git python3 libtcmalloc4 libglvnd
 # Arch-based:
 sudo pacman -S wget git python3
 ```
+
 If your system is very new, you need to install python3.11 or python3.10:
+
 ```bash
 # Ubuntu 24.04
 sudo add-apt-repository ppa:deadsnakes/ppa
@@ -145,22 +167,28 @@ export python_cmd="python3.11"
 # or in webui-user.sh
 python_cmd="python3.11"
 ```
+
 2. Navigate to the directory you would like the webui to be installed and execute the following command:
+
 ```bash
-wget -q https://raw.githubusercontent.com/AUTOMATIC1111/stable-diffusion-webui/master/webui.sh
+wget -q https://raw.githubusercontent.com/lshqqytiger/stable-diffusion-webui-amdgpu/master/webui.sh
 ```
+
 Or just clone the repo wherever you want:
+
 ```bash
-git clone https://github.com/AUTOMATIC1111/stable-diffusion-webui
+git clone https://github.com/lshqqytiger/stable-diffusion-webui-amdgpu
 ```
 
 3. Run `webui.sh`.
 4. Check `webui-user.sh` for options.
+
 ### Installation on Apple Silicon
 
 Find the instructions [here](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Installation-on-Apple-Silicon).
 
 ## Contributing
+
 Here's how to add code to this repo: [Contributing](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Contributing)
 
 ## Documentation
@@ -170,6 +198,7 @@ The documentation was moved from this README over to the project's [wiki](https:
 For the purposes of getting Google and other search engines to crawl the wiki, here's a link to the (not for humans) [crawlable wiki](https://github-wiki-see.page/m/AUTOMATIC1111/stable-diffusion-webui/wiki).
 
 ## Credits
+
 Licenses for borrowed code can be found in `Settings -> Licenses` screen, and also in `html/licenses.html` file.
 
 - Stable Diffusion - https://github.com/Stability-AI/stablediffusion, https://github.com/CompVis/taming-transformers, https://github.com/mcmonkey4eva/sd3-ref
@@ -202,4 +231,6 @@ Licenses for borrowed code can be found in `Settings -> Licenses` screen, and al
 - Restart sampling - lambertae - https://github.com/Newbeeer/diffusion_restart_sampling
 - Hypertile - tfernd - https://github.com/tfernd/HyperTile
 - Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user.
+- Olive - https://github.com/microsoft/Olive
+- kohya-ss/sd-scripts - https://github.com/kohya-ss/sd-scripts
 - (You)
diff --git a/configs/olive/sd/text_encoder.json b/configs/olive/sd/text_encoder.json
@@ -0,0 +1,134 @@
+{
+  "input_model": {
+    "type": "PyTorchModel",
+    "config": {
+      "model_path": "",
+      "model_loader": "text_encoder_load",
+      "model_script": "modules/olive_script.py",
+      "io_config": {
+        "input_names": ["input_ids"],
+        "output_names": ["last_hidden_state", "pooler_output"],
+        "dynamic_axes": { "input_ids": { "0": "batch", "1": "sequence" } }
+      },
+      "dummy_inputs_func": "text_encoder_conversion_inputs"
+    }
+  },
+  "systems": {
+    "local_system": {
+      "type": "LocalSystem",
+      "config": {
+        "accelerators": [
+          {
+            "device": "gpu",
+            "execution_providers": ["DmlExecutionProvider"]
+          }
+        ]
+      }
+    }
+  },
+  "evaluators": {
+    "common_evaluator": {
+      "metrics": [
+        {
+          "name": "latency",
+          "type": "latency",
+          "sub_types": [{ "name": "avg" }],
+          "user_config": {
+            "user_script": "modules/olive_script.py",
+            "dataloader_func": "text_encoder_data_loader",
+            "batch_size": 1
+          }
+        }
+      ]
+    }
+  },
+  "passes": {
+    "optimize_CPUExecutionProvider": {
+      "type": "OrtTransformersOptimization",
+      "config": {
+        "model_type": "clip",
+        "opt_level": 0,
+        "float16": false,
+        "use_gpu": false,
+        "keep_io_types": false
+      }
+    },
+    "optimize_DmlExecutionProvider": {
+      "type": "OrtTransformersOptimization",
+      "config": {
+        "model_type": "clip",
+        "opt_level": 0,
+        "float16": true,
+        "use_gpu": true,
+        "keep_io_types": false,
+        "optimization_options": {
+          "enable_gelu": true,
+          "enable_layer_norm": true,
+          "enable_attention": true,
+          "use_multi_head_attention": true,
+          "enable_skip_layer_norm": false,
+          "enable_embed_layer_norm": true,
+          "enable_bias_skip_layer_norm": false,
+          "enable_bias_gelu": true,
+          "enable_gelu_approximation": false,
+          "enable_qordered_matmul": false,
+          "enable_shape_inference": true,
+          "enable_gemm_fast_gelu": false,
+          "enable_nhwc_conv": false,
+          "enable_group_norm": true,
+          "enable_bias_splitgelu": false,
+          "enable_packed_qkv": true,
+          "enable_packed_kv": true,
+          "enable_bias_add": false,
+          "group_norm_channels_last": false
+        },
+        "force_fp32_ops": ["RandomNormalLike"],
+        "force_fp16_inputs": {
+          "GroupNorm": [0, 1, 2]
+        }
+      }
+    },
+    "optimize_CUDAExecutionProvider": {
+      "type": "OrtTransformersOptimization",
+      "config": {
+        "model_type": "clip",
+        "opt_level": 0,
+        "float16": true,
+        "use_gpu": true,
+        "keep_io_types": false
+      }
+    },
+    "optimize_ROCMExecutionProvider": {
+      "type": "OrtTransformersOptimization",
+      "config": {
+        "model_type": "clip",
+        "opt_level": 0,
+        "float16": true,
+        "use_gpu": true,
+        "keep_io_types": false
+      }
+    },
+    "quantization": {
+      "type": "OnnxDynamicQuantization",
+      "disable_search": true,
+      "config": {
+        "save_as_external_data": false,
+        "all_tensors_to_one_file": true,
+        "per_channel": false,
+        "reduce_range": false,
+        "MatMulConstBOnly": true
+      }
+    }
+  },
+  "pass_flows": [["optimize_AutoExecutionProvider"]],
+  "engine": {
+    "log_severity_level": 0,
+    "evaluator": "common_evaluator",
+    "evaluate_input_model": false,
+    "host": "local_system",
+    "target": "local_system",
+    "cache_dir": "cache",
+    "output_name": "text_encoder",
+    "output_dir": "footprints"
+  }
+}