diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a10e658d..016cdff1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: - id: mypy args: [] additional_dependencies: - - pydantic==2.7.4 + - pydantic==2.9.2 - types-requests - types-pytz - types-setuptools @@ -40,7 +40,7 @@ repos: - horde_safety==0.2.3 - torch==2.3.1 - ruamel.yaml - - horde_engine==2.13.3 - - horde_sdk==0.14.0 - - horde_model_reference==0.8.1 + - horde_engine==2.15.2 + - horde_sdk==0.14.7 + - horde_model_reference==0.9.0 - semver diff --git a/README.md b/README.md index 07104c47..0a8b2a5b 100644 --- a/README.md +++ b/README.md @@ -102,6 +102,52 @@ You can double click the provided script files below from a file explorer or run 1. Make a copy of `bridgeData_template.yaml` to `bridgeData.yaml` 1. Edit `bridgeData.yaml` and follow the instructions within to fill in your details. +#### Suggested settings + +Models are loaded as needed and just-in-time. You can offer as many models as you want **provided you have an SSD, at least 32gb of ram, and at least 8gb of VRAM (see [Important Info](#important-info)**. Workers with HDDs are not recommended at this time but those with HDDs should run exactly 1 model. A typical SD1.5 model is around 2gb each, while a typical SDXL model is around 7gb each. Offering `all` models is currently around 700gb total and we commit to keeping that number below 1TB with any future changes. + +> Note: We suggest you disable any 'sleep' or reduced power modes for your system while the worker is running. + +- If you have a **24gb+ vram card**: + ```yaml + - safety_on_gpu: true + - high_memory_mode: true + - high_performance_mode: true + - post_process_job_overlap: true + - unload_models_from_vram_often: false + - max_threads: 1 # If you have Flux/Cascade loaded, otherwise 2 max + - queue_size: 2 # You can set to 3 if you have 64GB or more of RAM + - max_batch: 8 # or higher + +- If you have a **12gb - 16gb card**: + ```yaml + - safety_on_gpu: true # Consider setting to `false` if offering Cascade or Flux + - high_memory_mode: true + - moderate_performance_mode: true + - unload_models_from_vram_often: false + - max_threads: 1 + - max_batch: 4 # or higher + +- If you have an **8gb-10gb vram card**: + - ```yaml + - queue_size: 1 # max **or** only offer flux + - safety_on_gpu: false + - max_threads: 1 + - max_power: 32 # no higher than 32 + - max_batch: 4 # no higher than 4 + - allow_post_processing: false # If offering SDXL or Flux, otherwise you may set to true + - allow_sdxl_controlnet: false + + - Be sure to shut every single VRAM consuming application you can and do not use the computer with the worker running for any purpose. + +- Workers which have **low end cards or have low performance for other reasons**: + ```yaml + - extra_slow_worker: true + # gives you considerably more time to finish job, but requests will not go to your worker unless the requester opts-in (even anon users do not use extra_slow_workers by default). You should only consider using this if you have historically had less than 0.3 MPS/S or less than 3000 kudos/hr consistently **and** you are sure the worker is otherwise configured correctly. + - limit_max_steps: true + # reduces the maximum total number of steps in a single job you will receive based on the model baseline. + - preload_timeout: 120 + # gives you more time to load models off disk. **Note**: Abusing this value can lead to a major loss of kudos and may also lead to maintainance mode, even with `extra_slow_worker: true`. ### Starting/stopping @@ -166,6 +212,32 @@ To update: - **Advanced users**: If you do not want to use mamba or you are comfortable with python/venvs, see [README_advanced.md](README_advanced.md). 1. Continue with [Starting/stopping](#startingstopping) instructions above +# Custom Models + +You can host your own image models on the horde which are not available in our model reference, but this process is a bit more complex. + +To start with, you need to manually request the `customizer` role from then horde team. You can ask for it in the discord channel. This is a manually assigned role to prevent abuse of this feature. + +Once you have the customizer role, you need to download the model files you want to host. Place them in any location on your system. + +Finally, you need to point your worker to their location and provide some information about them. On your bridgeData.yaml simply add lines like the following + +``` +custom_models: + - name: Movable figure model XL + baseline: stable_diffusion_xl + filepath: /home/db0/projects/CUSTOM_MODELS/PVCStyleModelMovable_beta25Realistic.safetensors +``` + +And then add the same "name" to your models_to_load. + +If everything was setup correctly, you should now see a `custom_models.json` in your worker directory after the worker starts, and the model should be offered by your worker. + +Note that: + +* You cannot serve custom models with the same name as any of our regular models +* The horde doesn't know your model, so it will treat it as a SD 1.5 model for kudos rewards and cannot warn people using the wrong parameters such as clip_skip + # Docker See [README_advanced.md](README_advanced.md). diff --git a/bridgeData_template.yaml b/bridgeData_template.yaml index 0816d880..2beb4f0d 100644 --- a/bridgeData_template.yaml +++ b/bridgeData_template.yaml @@ -1,246 +1,286 @@ -## Common for all worker Types +## Common for all worker types -# The horde url +# !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! +# See also the readme's "Suggested settings" section for recommended settings. !!! +# !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! !!! + +# The Horde URL. Do not change this unless you are using a custom Horde. horde_url: "https://aihorde.net" -# The api_key identifies a unique user in the horde -# Visit https://stablehorde.net/register to create one before you can join +# The API key identifies a unique user in the Horde +# Visit https://aihorde.net/register to create one before you can join api_key: "0000000000" -# Put other users whose prompts you want to prioritize. -# The owner's username is always included so you don't need to add it here if you use the key specified in `api_key` for requests +# List of usernames whose prompts you want to prioritize. +# The owner's username is always included, so you don't need to add it here if you use the key specified in `api_key` for requests. priority_usernames: [] -# The amount of parallel jobs to pick up for the horde. -# Only high end cards (e.g, 3080 or better) benefit from this setting. +# The maximum number of parallel jobs to run at the same time. +# Only high-end cards (e.g., 3080 or better) benefit from this setting. # If you have a 20xx or earlier, or a xx60/xx70, do not change this setting from 1. max_threads: 1 +# 24GB+ VRAM: 1 (2 max if Flux/Cascade loaded) +# 12GB-16GB VRAM: 1 +# 8GB-10GB VRAM: 1 -# We will keep this many requests in the queue so we can start working as soon as a thread is available -# This generally should be or 1 or 2. You should never set this higher than 2 if your max_threads is 2. +# Number of requests to keep in the queue to start working as soon as a thread is available. +# Generally should be 1 or 2. Never set this higher than 2 if your max_threads is 2. +# Warning: Increasing this value directly increases system RAM usage significantly. queue_size: 1 +# 24GB+ VRAM: 2 (3 if 64GB+ RAM) +# 8GB-10GB VRAM: 1 (max or only offer flux) -# This will try to pull these many jobs per request and perform batched inference. -# This is way more optimized than doing them 1 by 1, but is slower. -# Keep in mind, that the horde will not give your max batch at your max resolution -# In order to avoid running out of VRAM. -# The Horde will assume you can fulfil your max batch at HALF you max resolution. -# So make sure you can generate your max_batch @ max_power/2 -# Over your half max_power, AI Horde will smartly assign only as much batches -# as it calculates you can achieve. If you start running out of VRAM, reduce -# max_power or max_batch. +# Number of jobs to pull per request and perform batched inference. +# More optimized than doing them one by one but slower. +# Ensure you can generate your max_batch at half your max_power. max_batch: 1 +# 24GB+ VRAM: 8 or higher +# 12GB-16GB VRAM: 4 or higher +# 8GB-10GB VRAM: 4 (no higher than 4) - -# When Enabled will run CLIP model (Checking for potential CSAM or NSFW) on GPU insted of CPU -# Enable this on cards with 12gb or more VRAM to increase the rate you complete jobs -# You can enable this on cards with less VRAM if you do not load SD2.0 or SDXL models, and keep your max_power low (<32) +# Run CLIP model (checking for potential CSAM or NSFW) on GPU instead of CPU. +# Enable this on cards with 12GB or more VRAM to increase job completion rate. +# ~1.2GB of VRAM overhead safety_on_gpu: false +# 24GB+ VRAM: true +# 12GB-16GB VRAM: true (consider false if offering Cascade or Flux) +# 8GB-10GB VRAM: false - -# If set to True, this worker will not only pick up jobs where the user has the required kudos upfront. -# Effectively this will exclude all anonymous accounts, and registered accounts who haven't contributed. -# Users in priority_usernames and trusted users will bypass this restriction +# Only pick up jobs where the user has the required kudos upfront. +# Excludes all anonymous accounts and registered accounts who haven't contributed. require_upfront_kudos: false -# If set, this worker will use this civitai API token when downloading any resources from civitai. -# This is required in order to provide LoRas/TIs (or other resources) -# which are marked as requiring a civitai token to download. -# -# You can get your civitai API Key from https://civitai.com/user/account (look for 'Add API Key') -# -# Remove the # from the line below and add your civitai API token to enable this feature. +# Use this Civitai API token when downloading resources from Civitai. +# Required for providing LoRas/TIs or other resources marked as requiring a Civitai token. +# Get your Civitai API Key from https://civitai.com/user/account (look for 'Add API Key'). +# Remove the # from the line below and add your Civitai API token to enable this feature. # civitai_api_token: ####################################### ## Dreamer (Stable Diffusion Worker) ## ####################################### -# The worker name to use when running a dreamer instance. +# Worker name for running a Dreamer instance. dreamer_name: "An Awesome Dreamer" -# This is representation of your max resolution (max pixels) supported. -# The formula is `64 * 64 * 8 * max_power` (giving total pixels) -# e.g.: -# 8 = 512x512 -# 18 = 768x768 -# 32 = 1024x1024 -# 50 = 1280x1280 -# ... - +# Max resolution (max pixels) supported. +# Formula: `64 * 64 * 8 * max_power` (total pixels) +# Examples: +# 8 = 512x512 +# 18 = 768x768 +# 32 = 1024x1024 +# 50 = 1280x1280 max_power: 8 - -# A list of words which you do not want to your worker to accept if they are in the prompt +# Suggested values: +# 8GB-10GB VRAM: 32 (no higher than 32) +# 12GB-16GB VRAM: 32-64 (no higher than 64) +# 24GB+ VRAM: 64-128 (no higher than 128) + +# Use more VRAM on average but reduce time spent loading models. +high_memory_mode: false +# Suggested values: +# 24GB+ VRAM: true +# 12GB-16GB VRAM: true (consider false if offering Cascade or Flux) + +# Fill local queue much faster but may be penalized by the server if you cannot keep up with jobs. +high_performance_mode: false +# Suggested values: +# 24GB+ VRAM: true + +# Fill local queue somewhat faster but may be penalized by the server if you cannot keep up with jobs. +# Overridden by high_performance_mode. +moderate_performance_mode: false +# Suggested values: +# 12GB-16GB VRAM: true + +# Start processing the next job before the current job finishes post-processing. +# Reduces time between jobs but may cause crashes on low RAM or VRAM systems. +post_process_job_overlap: false +# Suggested values: +# 24GB+ VRAM: true + +# Aggressively unload models from VRAM when not in use. +# Should be true for most workers with GPUs with less than 16GB of VRAM. +unload_models_from_vram_often: true +# Suggested values: +# 24GB+ VRAM: false +# 12GB-16GB VRAM: false +# 8GB-10GB VRAM: true + +# List of words to reject if they appear in the prompt. blacklist: [] -# If you do not want to serve NSFW images, set this to false. +# Serve NSFW images if true. nsfw: true -# If you want +# Censor NSFW images if true. censor_nsfw: false -# A list of words for which you always want to censor, even if `nsfw` is true. +# List of words to always censor, even if `nsfw` is true. censorlist: [] -# Accept jobs which use a user-supplied image. +# Accept jobs using a user-supplied image. allow_img2img: true -# Accept jobs which use a user-supplied image and an inpainting specific model. +# Accept jobs using a user-supplied image and an inpainting-specific model. # Forced to false if `allow_img2img` is false. allow_painting: true -# Allow user request which are from behind VPNs. -# Note: The worker does not directly interact with user IPs - it only interacts with the stablehorde API. +# Allow user requests from behind VPNs. +# Note: The worker does not directly interact with user IPs - it only interacts with the StableHorde API. allow_unsafe_ip: true -# Allow upscaling, facefixer and other post-generation features to be performed by the worker. +# Allow upscaling, facefixer, and other post-generation features. allow_post_processing: true +# 8GB-10GB VRAM: false (if offering SDXL or Flux, otherwise true) -# Allow controlnet jobs to be done by this worker. -# Note: There is additional RAM/VRAM overhead with this option. Low VRAM cards (<6gb) should be cautious to enable this. +# Allow ControlNet jobs. +# Note: Additional RAM/VRAM overhead. Low VRAM cards (<6GB) should be cautious. allow_controlnet: false -# Allow SDXL jobs with high memory add-ons like controlnet or transparency to be done by this worker. -# Note: There is significant additional RAM/VRAM overhead with this option. Medium VRAM cards (<12gb) should be cautious to enable this. -# Note that if this is true, allow_controlnet must also be true +# Allow SDXL jobs with high memory add-ons like ControlNet or transparency. +# Note: Significant additional RAM/VRAM overhead. Medium VRAM cards (<12GB) should be cautious. +# Note that if this is true, allow_controlnet must also be true. allow_sdxl_controlnet: false +# 16GB+ VRAM: true +# 8GB-10GB VRAM: false -# Allow LoRas to be used. This requires that you have a fast internet connection. -# LoRas will be downloaded on demand. `max_lora_cache_size` controls how many gigabytes you will keep downloaded. -# 5gb of preselected LoRas are always downloaded the first time you start the worker with this setting. +# Allow LoRas to be used. Requires a fast internet connection. +# LoRas will be downloaded on demand. `max_lora_cache_size` controls how many gigabytes to keep downloaded. +# 5GB of preselected LoRas are always downloaded the first time you start the worker with this setting. +# Note that there can be a significant delay when downloading LoRas causing GPU downtime. allow_lora: false -# The number of gigabytes of LoRas too keep cached. This is in addition to the preselected LoRas. -max_lora_cache_size: 10 # In gigabytes. Min is 10. +# Delete any unknown LoRas from the loras folder when `download_models.py` is run. +# Warning: This option will delete any LoRas not in the model reference, including custom LoRas. +purge_loras_on_download: false + +# Number of gigabytes of LoRas to keep cached. Minimum is 10GB. +max_lora_cache_size: 10 + +# Set to true if your worker is extraordinarily slow (below 0.1 mps/s). +# Users can choose to skip it when requesting generations, but job timeout and request expiry timeout are tripled. +extra_slow_worker: false +# Low-end cards or low performance: true -# Automatically determine the models which have the highest queue and offer those. +# Only pick up jobs requesting steps lower than the model's average steps. +# Useful for slower workers or if you don't want to serve requests with an extraordinary number of steps. +limit_max_steps: false +# Low-end cards or low performance: true + +# Automatically determine the models with the highest queue and offer those. dynamic_models: false # Currently unused in reGen -# The number of models to offer when `dynamic_models` is true. +# Number of models to offer when `dynamic_models` is true. number_of_dynamic_models: 0 # Currently unused in reGen -# If `dynamic_models` is true, the maximum number of models to download automatically for that purpose. +# Maximum number of models to download automatically for `dynamic_models`. max_models_to_download: 10 # Currently unused in reGen -# The frequency (in seconds) to output worker summary stats, such as kudos per hour. +# Frequency (in seconds) to output worker summary stats, such as kudos per hour. # Set to zero to disable stats output completely. stats_output_frequency: 30 - -# The location in which stable diffusion ckpt models are stored +# Location where models are stored. cache_home: "./models/" -# The location of the temp directory, also used for the model cache +# Location of the temp directory, also used for the model cache. temp_dir: "./tmp" # Currently unused in reGen - -# Always download models when required without prompting +# Always download models when required without prompting. always_download: true # Currently unused in reGen -# Disable the terminal GUI, which displays information about the worker and the horde. +# Disable the terminal GUI, which displays information about the worker and the Horde. disable_terminal_ui: false # Currently unused in reGen - # Obsolete vram_to_leave_free: "80%" # Currently unused in reGen -# The target amount of system ram to keep free. -# The worker only makes a best effort. You still have to avoid using up too much RAM with other programs. +# Target amount of system RAM to keep free. +# The worker only makes a best effort. Avoid using too much RAM with other programs. ram_to_leave_free: "80%" # Currently unused in reGen # Obsolete disable_disk_cache: false # Currently unused in reGen -# The models to use. -# Instead of a model name you may use of any of the following magic constants: -# "ALL" - means load all possible models. Expect this to take over 1TB of space! -# "TOP n" - load the top "N" most popular models, use for example, "top 5" or "top 3", etc. -# "BOTTOM n" - load the bottom "N" models (i.e., the least popular N models) use for example, "bottom 5" or "bottom 3", etc. -# -# "ALL SD15 MODELS" - All Stable Diffusion 1.5 models -# "ALL SD21 MODELS" - All Stable Diffusion 2.0/2.1 models -# "ALL SDXL MODELS" - All Stable Diffusion XL models -# "ALL INPAINTING MODELS" - All models marked as being for inpainting -# -# "ALL SFW MODELS" - All models marked as being SFW -# "ALL NSFW MODELS" - All models marked as being NSFW -# -# (not currently supported) "ALL