diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a10e658d..016cdff1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -19,7 +19,7 @@ repos:
     -   id: mypy
         args: []
         additional_dependencies:
-        - pydantic==2.7.4
+        - pydantic==2.9.2
         - types-requests
         - types-pytz
         - types-setuptools
@@ -40,7 +40,7 @@ repos:
         - horde_safety==0.2.3
         - torch==2.3.1
         - ruamel.yaml
-        - horde_engine==2.13.3
-        - horde_sdk==0.14.0
-        - horde_model_reference==0.8.1
+        - horde_engine==2.15.2
+        - horde_sdk==0.14.7
+        - horde_model_reference==0.9.0
         - semver
diff --git a/README.md b/README.md
index 07104c47..0a8b2a5b 100644
--- a/README.md
+++ b/README.md
@@ -102,6 +102,52 @@ You can double click the provided script files below from a file explorer or run
 1. Make a copy of `bridgeData_template.yaml` to `bridgeData.yaml`
 1. Edit `bridgeData.yaml` and follow the instructions within to fill in your details.
 
+#### Suggested settings
+
+Models are loaded as needed and just-in-time. You can offer as many models as you want **provided you have an SSD, at least 32gb of ram, and at least 8gb of VRAM (see [Important Info](#important-info)**. Workers with HDDs are not recommended at this time but those with HDDs should run exactly 1 model. A typical SD1.5 model is around 2gb each, while a typical SDXL model is around 7gb each. Offering `all` models is currently around 700gb total and we commit to keeping that number below 1TB with any future changes.
+
+> Note: We suggest you disable any 'sleep' or reduced power modes for your system while the worker is running.
+
+- If you have a **24gb+ vram card**:
+  ```yaml
+  - safety_on_gpu: true
+  - high_memory_mode: true
+  - high_performance_mode: true
+  - post_process_job_overlap: true
+  - unload_models_from_vram_often: false
+  - max_threads: 1 # If you have Flux/Cascade loaded, otherwise 2 max
+  - queue_size: 2 # You can set to 3 if you have 64GB or more of RAM
+  - max_batch: 8 # or higher
+
+- If you have a **12gb - 16gb card**:
+  ```yaml
+  - safety_on_gpu: true # Consider setting to `false` if offering Cascade or Flux
+  - high_memory_mode: true
+  - moderate_performance_mode: true
+  - unload_models_from_vram_often: false
+  - max_threads: 1
+  - max_batch: 4 # or higher
+
+- If you have an **8gb-10gb vram card**:
+  - ```yaml
+    - queue_size: 1 # max **or** only offer flux
+    - safety_on_gpu: false
+    - max_threads: 1
+    - max_power: 32 # no higher than 32
+    - max_batch: 4 # no higher than 4
+    - allow_post_processing: false # If offering SDXL or Flux, otherwise you may set to true
+    - allow_sdxl_controlnet: false
+
+  - Be sure to shut every single VRAM consuming application you can and do not use the computer with the worker running for any purpose.
+
+- Workers which have **low end cards or have low performance for other reasons**:
+  ```yaml
+  - extra_slow_worker: true
+    # gives you considerably more time to finish job, but requests will not go to your worker unless the requester opts-in (even anon users do not use extra_slow_workers by default). You should only consider using this if you have historically had less than 0.3 MPS/S or less than 3000 kudos/hr consistently **and** you are sure the worker is otherwise configured correctly.
+  - limit_max_steps: true
+    # reduces the maximum total number of steps in a single job you will receive based on the model baseline.
+  - preload_timeout: 120
+    # gives you more time to load models off disk. **Note**: Abusing this value can lead to a major loss of kudos and may also lead to maintainance mode, even with `extra_slow_worker: true`.
 
 ### Starting/stopping
 
@@ -166,6 +212,32 @@ To update:
    - **Advanced users**: If you do not want to use mamba or you are comfortable with python/venvs, see [README_advanced.md](README_advanced.md).
 1. Continue with [Starting/stopping](#startingstopping) instructions above
 
+# Custom Models
+
+You can host your own image models on the horde which are not available in our model reference, but this process is a bit more complex.
+
+To start with, you need to manually request the `customizer` role from then horde team. You can ask for it in the discord channel. This is a manually assigned role to prevent abuse of this feature.
+
+Once you have the customizer role, you need to download the model files you want to host. Place them in any location on your system.
+
+Finally, you need to point your worker to their location and provide some information about them. On your bridgeData.yaml simply add lines like the following
+
+```
+custom_models:
+  - name: Movable figure model XL
+    baseline: stable_diffusion_xl
+    filepath: /home/db0/projects/CUSTOM_MODELS/PVCStyleModelMovable_beta25Realistic.safetensors
+```
+
+And then add the same "name" to your models_to_load.
+
+If everything was setup correctly, you should now see a `custom_models.json` in your worker directory after the worker starts, and the model should be offered by your worker.
+
+Note that:
+
+* You cannot serve custom models with the same name as any of our regular models
+* The horde doesn't know your model, so it will treat it as a SD 1.5 model for kudos rewards and cannot warn people using the wrong parameters such as clip_skip
+
 # Docker
 
 See [README_advanced.md](README_advanced.md).
diff --git a/bridgeData_template.yaml b/bridgeData_template.yaml
index 0816d880..2beb4f0d 100644
--- a/bridgeData_template.yaml
+++ b/bridgeData_template.yaml
@@ -1,246 +1,286 @@
-## Common for all worker Types
+## Common for all worker types
 
-# The horde url
+# !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!
+# See also the readme's "Suggested settings" section for recommended settings.    !!!
+# !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!  !!!
+
+# The Horde URL. Do not change this unless you are using a custom Horde.
 horde_url: "https://aihorde.net"
 
-# The api_key identifies a unique user in the horde
-# Visit https://stablehorde.net/register to create one before you can join
+# The API key identifies a unique user in the Horde
+# Visit https://aihorde.net/register to create one before you can join
 api_key: "0000000000"
 
-# Put other users whose prompts you want to prioritize.
-# The owner's username is always included so you don't need to add it here if you use the key specified in `api_key` for requests
+# List of usernames whose prompts you want to prioritize.
+# The owner's username is always included, so you don't need to add it here if you use the key specified in `api_key` for requests.
 priority_usernames: []
 
-# The amount of parallel jobs to pick up for the horde.
-# Only high end cards (e.g, 3080 or better) benefit from this setting.
+# The maximum number of parallel jobs to run at the same time.
+# Only high-end cards (e.g., 3080 or better) benefit from this setting.
 # If you have a 20xx or earlier, or a xx60/xx70, do not change this setting from 1.
 max_threads: 1
+# 24GB+ VRAM: 1 (2 max if Flux/Cascade loaded)
+# 12GB-16GB VRAM: 1
+# 8GB-10GB VRAM: 1
 
-# We will keep this many requests in the queue so we can start working as soon as a thread is available
-# This generally should be or 1 or 2. You should never set this higher than 2 if your max_threads is 2.
+# Number of requests to keep in the queue to start working as soon as a thread is available.
+# Generally should be 1 or 2. Never set this higher than 2 if your max_threads is 2.
+# Warning: Increasing this value directly increases system RAM usage significantly.
 queue_size: 1
+# 24GB+ VRAM: 2 (3 if 64GB+ RAM)
+# 8GB-10GB VRAM: 1 (max or only offer flux)
 
-# This will try to pull these many jobs per request and perform batched inference.
-# This is way more optimized than doing them 1 by 1, but is slower.
-# Keep in mind, that the horde will not give your max batch at your max resolution
-# In order to avoid running out of VRAM.
-# The Horde will assume you can fulfil your max batch at HALF you max resolution.
-# So make sure you can generate your max_batch @ max_power/2
-# Over your half max_power, AI Horde will smartly assign only as much batches
-# as it calculates you can achieve. If you start running out of VRAM, reduce
-# max_power or max_batch.
+# Number of jobs to pull per request and perform batched inference.
+# More optimized than doing them one by one but slower.
+# Ensure you can generate your max_batch at half your max_power.
 max_batch: 1
+# 24GB+ VRAM: 8 or higher
+# 12GB-16GB VRAM: 4 or higher
+# 8GB-10GB VRAM: 4 (no higher than 4)
 
-
-# When Enabled will run CLIP model (Checking for potential CSAM or NSFW) on GPU insted of CPU
-# Enable this on cards with 12gb or more VRAM to increase the rate you complete jobs
-# You can enable this on cards with less VRAM if you do not load SD2.0 or SDXL models, and keep your max_power low (<32)
+# Run CLIP model (checking for potential CSAM or NSFW) on GPU instead of CPU.
+# Enable this on cards with 12GB or more VRAM to increase job completion rate.
+# ~1.2GB of VRAM overhead
 safety_on_gpu: false
+# 24GB+ VRAM: true
+# 12GB-16GB VRAM: true (consider false if offering Cascade or Flux)
+# 8GB-10GB VRAM: false
 
-
-# If set to True, this worker will not only pick up jobs where the user has the required kudos upfront.
-# Effectively this will exclude all anonymous accounts, and registered accounts who haven't contributed.
-# Users in priority_usernames and trusted users will bypass this restriction
+# Only pick up jobs where the user has the required kudos upfront.
+# Excludes all anonymous accounts and registered accounts who haven't contributed.
 require_upfront_kudos: false
 
-# If set, this worker will use this civitai API token when downloading any resources from civitai.
-# This is required in order to provide LoRas/TIs (or other resources)
-# which are marked as requiring a civitai token to download.
-#
-# You can get your civitai API Key from https://civitai.com/user/account (look for 'Add API Key')
-#
-# Remove the # from the line below and add your civitai API token to enable this feature.
+# Use this Civitai API token when downloading resources from Civitai.
+# Required for providing LoRas/TIs or other resources marked as requiring a Civitai token.
+# Get your Civitai API Key from https://civitai.com/user/account (look for 'Add API Key').
+# Remove the # from the line below and add your Civitai API token to enable this feature.
 # civitai_api_token:
 
 #######################################
 ## Dreamer (Stable Diffusion Worker) ##
 #######################################
 
-# The worker name to use when running a dreamer instance.
+# Worker name for running a Dreamer instance.
 dreamer_name: "An Awesome Dreamer"
 
-# This is representation of your max resolution (max pixels) supported.
-# The formula is `64 * 64 * 8 * max_power` (giving total pixels)
-#  e.g.:
-#       8  = 512x512
-#       18 = 768x768
-#       32 = 1024x1024
-#       50 = 1280x1280
-#       ...
-
+# Max resolution (max pixels) supported.
+# Formula: `64 * 64 * 8 * max_power` (total pixels)
+# Examples:
+# 8  = 512x512
+# 18 = 768x768
+# 32 = 1024x1024
+# 50 = 1280x1280
 max_power: 8
-
-# A list of words which you do not want to your worker to accept if they are in the prompt
+# Suggested values:
+#   8GB-10GB VRAM: 32 (no higher than 32)
+#   12GB-16GB VRAM: 32-64 (no higher than 64)
+#   24GB+ VRAM: 64-128 (no higher than 128)
+
+# Use more VRAM on average but reduce time spent loading models.
+high_memory_mode: false
+# Suggested values:
+#   24GB+ VRAM: true
+#   12GB-16GB VRAM: true (consider false if offering Cascade or Flux)
+
+# Fill local queue much faster but may be penalized by the server if you cannot keep up with jobs.
+high_performance_mode: false
+# Suggested values:
+#   24GB+ VRAM: true
+
+# Fill local queue somewhat faster but may be penalized by the server if you cannot keep up with jobs.
+# Overridden by high_performance_mode.
+moderate_performance_mode: false
+# Suggested values:
+#   12GB-16GB VRAM: true
+
+# Start processing the next job before the current job finishes post-processing.
+# Reduces time between jobs but may cause crashes on low RAM or VRAM systems.
+post_process_job_overlap: false
+# Suggested values:
+#   24GB+ VRAM: true
+
+# Aggressively unload models from VRAM when not in use.
+# Should be true for most workers with GPUs with less than 16GB of VRAM.
+unload_models_from_vram_often: true
+# Suggested values:
+#   24GB+ VRAM: false
+#   12GB-16GB VRAM: false
+#   8GB-10GB VRAM: true
+
+# List of words to reject if they appear in the prompt.
 blacklist: []
 
-# If you do not want to serve NSFW images, set this to false.
+# Serve NSFW images if true.
 nsfw: true
 
-# If you want
+# Censor NSFW images if true.
 censor_nsfw: false
 
-# A list of words for which you always want to censor, even if `nsfw` is true.
+# List of words to always censor, even if `nsfw` is true.
 censorlist: []
 
-# Accept jobs which use a user-supplied image.
+# Accept jobs using a user-supplied image.
 allow_img2img: true
 
-# Accept jobs which use a user-supplied image and an inpainting specific model.
+# Accept jobs using a user-supplied image and an inpainting-specific model.
 # Forced to false if `allow_img2img` is false.
 allow_painting: true
 
-# Allow user request which are from behind VPNs.
-# Note: The worker does not directly interact with user IPs - it only interacts with the stablehorde API.
+# Allow user requests from behind VPNs.
+# Note: The worker does not directly interact with user IPs - it only interacts with the StableHorde API.
 allow_unsafe_ip: true
 
-# Allow upscaling, facefixer and other post-generation features to be performed by the worker.
+# Allow upscaling, facefixer, and other post-generation features.
 allow_post_processing: true
+# 8GB-10GB VRAM: false (if offering SDXL or Flux, otherwise true)
 
-# Allow controlnet jobs to be done by this worker.
-# Note: There is additional RAM/VRAM overhead with this option. Low VRAM cards (<6gb) should be cautious to enable this.
+# Allow ControlNet jobs.
+# Note: Additional RAM/VRAM overhead. Low VRAM cards (<6GB) should be cautious.
 allow_controlnet: false
 
-# Allow SDXL jobs with high memory add-ons like controlnet or transparency to be done by this worker.
-# Note: There is significant additional RAM/VRAM overhead with this option. Medium VRAM cards (<12gb) should be cautious to enable this.
-# Note that if this is true, allow_controlnet must also be true
+# Allow SDXL jobs with high memory add-ons like ControlNet or transparency.
+# Note: Significant additional RAM/VRAM overhead. Medium VRAM cards (<12GB) should be cautious.
+# Note that if this is true, allow_controlnet must also be true.
 allow_sdxl_controlnet: false
+# 16GB+ VRAM: true
+# 8GB-10GB VRAM: false
 
-# Allow LoRas to be used. This requires that you have a fast internet connection.
-# LoRas will be downloaded on demand. `max_lora_cache_size` controls how many gigabytes you will keep downloaded.
-# 5gb of preselected LoRas are always downloaded the first time you start the worker with this setting.
+# Allow LoRas to be used. Requires a fast internet connection.
+# LoRas will be downloaded on demand. `max_lora_cache_size` controls how many gigabytes to keep downloaded.
+# 5GB of preselected LoRas are always downloaded the first time you start the worker with this setting.
+# Note that there can be a significant delay when downloading LoRas causing GPU downtime.
 allow_lora: false
 
-# The number of gigabytes of LoRas too keep cached. This is in addition to the preselected LoRas.
-max_lora_cache_size: 10 # In gigabytes. Min is 10.
+# Delete any unknown LoRas from the loras folder when `download_models.py` is run.
+# Warning: This option will delete any LoRas not in the model reference, including custom LoRas.
+purge_loras_on_download: false
+
+# Number of gigabytes of LoRas to keep cached. Minimum is 10GB.
+max_lora_cache_size: 10
+
+# Set to true if your worker is extraordinarily slow (below 0.1 mps/s).
+# Users can choose to skip it when requesting generations, but job timeout and request expiry timeout are tripled.
+extra_slow_worker: false
+# Low-end cards or low performance: true
 
-# Automatically determine the models which have the highest queue and offer those.
+# Only pick up jobs requesting steps lower than the model's average steps.
+# Useful for slower workers or if you don't want to serve requests with an extraordinary number of steps.
+limit_max_steps: false
+# Low-end cards or low performance: true
+
+# Automatically determine the models with the highest queue and offer those.
 dynamic_models: false # Currently unused in reGen
 
-# The number of models to offer when `dynamic_models` is true.
+# Number of models to offer when `dynamic_models` is true.
 number_of_dynamic_models: 0 # Currently unused in reGen
 
-# If `dynamic_models` is true, the maximum number of models to download automatically for that purpose.
+# Maximum number of models to download automatically for `dynamic_models`.
 max_models_to_download: 10 # Currently unused in reGen
 
-# The frequency (in seconds) to output worker summary stats, such as kudos per hour.
+# Frequency (in seconds) to output worker summary stats, such as kudos per hour.
 # Set to zero to disable stats output completely.
 stats_output_frequency: 30
 
-
-# The location in which stable diffusion ckpt models are stored
+# Location where models are stored.
 cache_home: "./models/"
 
-# The location of the temp directory, also used for the model cache
+# Location of the temp directory, also used for the model cache.
 temp_dir: "./tmp" # Currently unused in reGen
 
-
-# Always download models when required without prompting
+# Always download models when required without prompting.
 always_download: true # Currently unused in reGen
 
-# Disable the terminal GUI, which displays information about the worker and the horde.
+# Disable the terminal GUI, which displays information about the worker and the Horde.
 disable_terminal_ui: false # Currently unused in reGen
 
-
 # Obsolete
 vram_to_leave_free: "80%" # Currently unused in reGen
 
-# The target amount of system ram to keep free.
-# The worker only makes a best effort. You still have to avoid using up too much RAM with other programs.
+# Target amount of system RAM to keep free.
+# The worker only makes a best effort. Avoid using too much RAM with other programs.
 ram_to_leave_free: "80%" # Currently unused in reGen
 
 # Obsolete
 disable_disk_cache: false # Currently unused in reGen
 
-# The models to use.
-# Instead of a model name you may use of any of the following magic constants:
-#   "ALL"  - means load all possible models. Expect this to take over 1TB of space!
-#   "TOP n"  - load the top "N" most popular models, use for example, "top 5" or "top 3", etc.
-#   "BOTTOM n"  - load the bottom "N" models (i.e., the least popular N models) use for example, "bottom 5" or "bottom 3", etc.
-#
-#   "ALL SD15 MODELS" - All Stable Diffusion 1.5 models
-#   "ALL SD21 MODELS" - All Stable Diffusion 2.0/2.1 models
-#   "ALL SDXL MODELS" - All Stable Diffusion XL models
-#   "ALL INPAINTING MODELS"  -  All models marked as being for inpainting
-#
-#   "ALL SFW MODELS"  -  All models marked as being SFW
-#   "ALL NSFW MODELS"  -  All models marked as being NSFW
-#
-#   (not currently supported) "ALL <style> MODELS"  -  For example, "all anime models", styles are: generalist, artistic, realistic, anime, furry, other
-#
-# The official model reference is (in json format) found at https://github.com/Haidra-Org/AI-Horde-image-model-reference/blob/main/stable_diffusion.json.
-# Several front ends also a list of model names.
-# The model name must match the name in the model reference, or be a magic constant.
-#
+# Models to use.
+# Instead of a model name, you may use any of the following magic constants:
+#   "ALL" - Load all possible models (over 1TB of space).
+#   "TOP n" - Load the top "N" most popular models (e.g., "top 5").
+#   "BOTTOM n" - Load the bottom "N" models (e.g., "bottom 5").
+#   "ALL SD15 MODELS" - All Stable Diffusion 1.5 models.
+#   "ALL SD21 MODELS" - All Stable Diffusion 2.0/2.1 models.
+#   "ALL SDXL MODELS" - All Stable Diffusion XL models.
+#   "ALL INPAINTING MODELS" - All models marked for inpainting.
+#   "ALL SFW MODELS" - All models marked as SFW.
+#   "ALL NSFW MODELS" - All models marked as NSFW.
+# The official model reference (in JSON format) is at https://github.com/Haidra-Org/AI-Horde-image-model-reference/blob/main/stable_diffusion.json.
+# The model name must match the name in the model reference or be a magic constant.
 models_to_load:
   - "top 2"
   #- "ALL MODELS"
   #- "TOP 3"
   #- "ALL SFW"
-  #- "stable_diffusion_2.1"
+  #- "Flux.1-Schnell fp8 (Compact)"
   #- "stable_diffusion"
   #- "Anything Diffusion"
-  #- "Yiffy"
-  #- "waifu_diffusion"
-  #- "Arcane Diffusion"
-  #- "Spider-Verse Diffusion"
-  #- "Elden Ring Diffusion"
-  #- "Robo-Diffusion"
-  #- "mo-di-diffusion"
-  #- "Knollingcase"
   #- "stable_diffusion_inpainting"
 
-# This is used when `dynamic_models` is true or TOP n models are selected in models_to_load
-# The models in this list will not be loaded when they exist in the top models
-#
-# This is to avoid loading models which you do not want either due to VRAM constraints, or due to NSFW content
-# or any other reason.
+# Models to skip when `dynamic_models` is true or TOP n models are selected in models_to_load.
+# Avoid loading models due to VRAM constraints, NSFW content, or other reasons.
 models_to_skip:
   - "pix2pix" # Not currently supported
   - "SDXL_beta::stability.ai#6901" # Do not remove this, as this model would never work
-  - A to Zovya RPG # This model is known to cause problems with reGen
+  - "A to Zovya RPG" # This model is known to cause problems with reGen
+  # - "Stable Cascade 1.0" # This a *very* VRAM intensive model
   # - ALL NSFW MODELS
   #- "stable_diffusion_inpainting"  # Inpainting is generally quite heavy along with other models for smaller GPUs.
   #- "stable_diffusion_2.1"  # Stable diffusion 2.1 has bigger memory requirements than 1.5, so if your card cannot lift, it, disable it
   #- "stable_diffusion_2.0"  # Same as Stable diffusion 2.1
-  ## Popular NSFW models:
-  #- "Zeipher Female Model"
-  #- "Hentai Diffusion"
-
-# If you are getting messages about jobs taking too long, you can change this to true if you no longer want to see them
-# Please note, that if you *are* getting these messages, you are serving jobs much slower than is ideal,
-# and you very likely would get more kudos/hr if you just lower your max_power.
+  # - HASDX # Un-pruned SD1.5 model (5gb+)
+  # - Anygen # Un-pruned SD1.5 model (5gb+)
+  # - PFG # Un-pruned SD1.5 model (5gb+)
+  # - Poison # Un-pruned SD1.5 model (5gb+)
+  # - MoistMix # Un-pruned SD1.5 model (5gb+)
+  # - Laolei New Berry Protogen Mix # Un-pruned SD1.5 model (5gb+)
+
+# Suppress speed warnings if jobs are taking too long.
+# Note: If you are getting these messages, you are serving jobs much slower than ideal.
+# Lower your max_power for more kudos/hr.
 suppress_speed_warnings: false # Currently unused in reGen
 
+# Exit if an unhandled fault occurs. Useful for setting up the worker as a system service.
+exit_on_unhandled_faults: false
+
 #########################
 ## Scribe (LLM Worker) ##
 #########################
 
-# The worker name to use when running a scribe worker.
+# Worker name for running a Scribe worker.
 scribe_name: "An Awesome Scribe"
 
-# The KoboldAI Client API URL
+# KoboldAI Client API URL.
 kai_url: "http://localhost:5000"
 
-# The max amount of tokens to generate with this worker
+# Max tokens to generate with this worker.
 max_length: 80
 
-# The max tokens to use from the prompt
+# Max tokens to use from the prompt.
 max_context_length: 1024
 
-# When set to true, the horde alias behind the API key will be appended to the model that is advertised to the horde
-# This will prevent the model from being used from the shared pool, but will ensure that no other worker
-# can pretend to serve it
+# Append the Horde alias behind the API key to the model advertised to the Horde.
+# Prevents the model from being used from the shared pool but ensures no other worker can pretend to serve it.
 branded_model: true
 
-## Alchemist (Image interrogation and post-processing)
+## Alchemist (Image Interrogation and Post-Processing)
 
-# The name to use when running an alchemist worker.
+# Worker name for running an Alchemist worker.
 alchemist_name: "An Awesome Alchemist"
 
-# The alchemy forms this worker can serve.
+# Alchemy forms this worker can serve.
 forms:
   - "caption"
-  - "nsfw" # uses CPU
+  - "nsfw" # Uses CPU
   # Heavier than the others, but rewards more kudos
   - "interrogation"
   - "post-process"
diff --git a/horde-bridge.cmd b/horde-bridge.cmd
index b62a3029..a03ece1b 100644
--- a/horde-bridge.cmd
+++ b/horde-bridge.cmd
@@ -5,7 +5,7 @@ cd /d %~dp0
 call runtime python -s -m pip -V
 
 call python -s -m pip uninstall hordelib
-call python -s -m pip install horde_sdk~=0.14.0 horde_model_reference~=0.8.1 horde_engine~=2.13.3 horde_safety~=0.2.3 -U
+call python -s -m pip install horde_sdk~=0.14.7 horde_model_reference~=0.9.0 horde_engine~=2.15.2 horde_safety~=0.2.3 -U
 
 if %ERRORLEVEL% NEQ 0 (
     echo "Please run update-runtime.cmd."
diff --git a/horde_worker_regen/__init__.py b/horde_worker_regen/__init__.py
index c7b48512..388a0bf5 100644
--- a/horde_worker_regen/__init__.py
+++ b/horde_worker_regen/__init__.py
@@ -8,7 +8,7 @@
 
 ASSETS_FOLDER_PATH = Path(__file__).parent / "assets"
 
-__version__ = "8.1.2"
+__version__ = "9.0.2"
 
 
 import pkg_resources  # noqa: E402
diff --git a/horde_worker_regen/_version_meta.json b/horde_worker_regen/_version_meta.json
index d3329eab..e69662df 100644
--- a/horde_worker_regen/_version_meta.json
+++ b/horde_worker_regen/_version_meta.json
@@ -1,8 +1,11 @@
 {
-    "recommended_version": "8.1.2",
-    "required_min_version": "4.2.7",
-    "required_min_version_update_date": "2024-03-09",
+    "recommended_version": "9.0.2",
+    "required_min_version": "9.0.2",
+    "required_min_version_update_date": "2024-09-26",
     "required_min_version_info": {
+        "9.0.2": {
+            "reason_for_update": "Flux Baseline support is incompatible with older workers."
+        },
         "4.1.9": {
             "reason_for_update": "Model reference handling is changing. Older workers will not be able to handle the new model reference format."
         },
@@ -20,6 +23,10 @@
         }
     },
     "beta_version_info": {
+        "9.0.2": {
+            "horde_model_reference_branch": "flux",
+            "beta_expiry_date": "2024-9-30"
+        },
         "9.0.1": {
             "horde_model_reference_branch": "flux",
             "beta_expiry_date": "2024-09-30"
diff --git a/horde_worker_regen/bridge_data/data_model.py b/horde_worker_regen/bridge_data/data_model.py
index 3713f002..5eed4e80 100644
--- a/horde_worker_regen/bridge_data/data_model.py
+++ b/horde_worker_regen/bridge_data/data_model.py
@@ -40,7 +40,7 @@ class reGenBridgeData(CombinedHordeBridgeData):
         default=None,
         alias="civitai_api_token",
     )
-    unload_models_from_vram: bool = Field(default=True)
+    unload_models_from_vram_often: bool = Field(default=True)
 
     process_timeout: int = Field(default=900)
     """The maximum amount of time to allow a job to run before it is killed"""
@@ -76,6 +76,8 @@ class reGenBridgeData(CombinedHordeBridgeData):
 
     purge_loras_on_download: bool = Field(default=False)
 
+    remove_maintenance_on_init: bool = Field(default=False)
+
     custom_models: list[dict] = Field(
         default_factory=list,
     )
@@ -99,6 +101,52 @@ def validate_performance_modes(self) -> reGenBridgeData:
                 "The queue_size value has been set to 2 because the max_threads value is greater than 2.",
             )
 
+        if self.extra_slow_worker:
+            if self.high_performance_mode:
+                self.high_performance_mode = False
+                logger.warning(
+                    "Extra slow worker is enabled, so the high_performance_mode value has been set to False.",
+                )
+            if self.moderate_performance_mode:
+                self.moderate_performance_mode = False
+                logger.warning(
+                    "Extra slow worker is enabled, so the moderate_performance_mode value has been set to False.",
+                )
+            if self.high_memory_mode:
+                self.high_memory_mode = False
+                logger.warning(
+                    "Extra slow worker is enabled, so the high_memory_mode value has been set to False.",
+                )
+            if self.very_high_memory_mode:
+                self.very_high_memory_mode = False
+                logger.warning(
+                    "Extra slow worker is enabled, so the very_high_memory_mode value has been set to False.",
+                )
+            if self.queue_size > 0:
+                self.queue_size = 0
+                logger.warning(
+                    "Extra slow worker is enabled, so the queue_size value has been set to 0. "
+                    "This behavior may change in the future.",
+                )
+            if self.max_threads > 1:
+                self.max_threads = 1
+                logger.warning(
+                    "Extra slow worker is enabled, so the max_threads value has been set to 1. "
+                    "This behavior may change in the future.",
+                )
+            if self.preload_timeout < 120:
+                self.preload_timeout = 120
+                logger.warning(
+                    "Extra slow worker is enabled, so the preload_timeout value has been set to 120. "
+                    "This behavior may change in the future.",
+                )
+            if not self.post_process_job_overlap:
+                self.post_process_job_overlap = True
+                logger.warning(
+                    "Extra slow worker is enabled, so the post_process_job_overlap value has been set to True. "
+                    "This behavior may change in the future.",
+                )
+
         if self.very_high_memory_mode and not self.high_memory_mode:
             self.high_memory_mode = True
             logger.warning(
@@ -107,20 +155,14 @@ def validate_performance_modes(self) -> reGenBridgeData:
 
         if self.high_memory_mode and not self.very_high_memory_mode:
             if self.max_threads != 1:
-                self.max_threads = 1
-                logger.warning(
-                    "High memory mode is enabled, so the max_threads value has been set to 1.",
-                )
-
-            if self.queue_size == 0:
                 logger.warning(
-                    "High memory mode is enabled and works best with a queue_size of 1.",
+                    "High memory mode is enabled. You may experience performance issues with more than one thread.",
                 )
 
-            if self.queue_size > 1:
-                self.queue_size = 1
+            if self.unload_models_from_vram_often:
                 logger.warning(
-                    "High memory mode is enabled, so the queue_size value has been set to 1.",
+                    "Please let us know if `unload_models_from_vram_often` improves or degrades performance with"
+                    " `high_memory_mode` enabled.",
                 )
 
             if self.cycle_process_on_model_change:
diff --git a/horde_worker_regen/consts.py b/horde_worker_regen/consts.py
index ff665e8b..62626c1b 100644
--- a/horde_worker_regen/consts.py
+++ b/horde_worker_regen/consts.py
@@ -7,8 +7,8 @@
 )
 
 
-KNOWN_SLOW_MODELS_DIFFICULTIES = {"Stable Cascade 1.0": 6.0}
-VRAM_HEAVY_MODELS = ["Stable Cascade 1.0"]
+KNOWN_SLOW_MODELS_DIFFICULTIES = {"Stable Cascade 1.0": 6.0, "Flux.1-Schnell fp8 (Compact)": 6.0}
+VRAM_HEAVY_MODELS = ["Stable Cascade 1.0", "Flux.1-Schnell fp16 (Compact)", "Flux.1-Schnell fp8 (Compact)"]
 KNOWN_SLOW_WORKFLOWS = {"qr_code": 2.0}
 KNOWN_CONTROLNET_WORKFLOWS = ["qr_code"]
 
diff --git a/horde_worker_regen/process_management/inference_process.py b/horde_worker_regen/process_management/inference_process.py
index 128f1763..e3603c60 100644
--- a/horde_worker_regen/process_management/inference_process.py
+++ b/horde_worker_regen/process_management/inference_process.py
@@ -87,6 +87,8 @@ def __init__(
         inference_semaphore: Semaphore,
         aux_model_lock: Lock,
         disk_lock: Lock,
+        *,
+        high_memory_mode: bool = False,
     ) -> None:
         """Initialise the HordeInferenceProcess.
 
@@ -98,6 +100,9 @@ def __init__(
             inference_semaphore (Semaphore): A semaphore used to limit the number of concurrent inference jobs.
             aux_model_lock (Lock): A lock used to prevent multiple processes from downloading auxiliary models at the \
             disk_lock (Lock): A lock used to prevent multiple processes from accessing disk at the same time.
+            high_memory_mode (bool, optional): Whether or not to use high memory mode. This mode uses more memory, but\
+                may be faster if the system has enough memory and VRAM. \
+                Defaults to False.
         """
         super().__init__(
             process_id=process_id,
@@ -123,8 +128,12 @@ def __init__(
         from hordelib.nodes.node_model_loader import HordeCheckpointLoader
 
         try:
+            logger.info(f"Initialising HordeLib with high_memory_mode={high_memory_mode}")
             with logger.catch(reraise=True):
-                self._horde = HordeLib(comfyui_callback=self._comfyui_callback)
+                self._horde = HordeLib(
+                    comfyui_callback=self._comfyui_callback,
+                    aggressive_unloading=not high_memory_mode,
+                )
                 self._shared_model_manager = SharedModelManager(do_not_load_model_mangers=True)
         except Exception as e:
             logger.critical(f"Failed to initialise HordeLib: {type(e).__name__} {e}")
@@ -394,6 +403,8 @@ def preload_model(
                 job_info=job_info,
             )
 
+        self.send_memory_report_message(include_vram=True)
+
         self.send_process_state_change_message(
             process_state=HordeProcessState.WAITING_FOR_JOB,
             info=f"Preloaded model {horde_model_name}",
@@ -632,6 +643,18 @@ def _receive_and_handle_control_message(self, message: HordeControlMessage) -> N
             )
         elif isinstance(message, HordeInferenceControlMessage):
             if message.control_flag == HordeControlFlag.START_INFERENCE:
+                if self._active_model_name is None:
+                    self.preload_model(
+                        horde_model_name=message.horde_model_name,
+                        will_load_loras=message.sdk_api_job_info.payload.loras is not None
+                        and len(
+                            message.sdk_api_job_info.payload.loras,
+                        )
+                        > 0,
+                        seamless_tiling_enabled=message.sdk_api_job_info.payload.tiling,
+                        job_info=message.sdk_api_job_info,
+                    )
+
                 if message.horde_model_name != self._active_model_name:
                     error_message = f"Received START_INFERENCE control message for model {message.horde_model_name} "
                     error_message += f"but currently active model is {self._active_model_name}"
@@ -696,17 +719,6 @@ def _receive_and_handle_control_message(self, message: HordeControlMessage) -> N
             else:
                 logger.critical(f"Received unexpected message: {message}")
                 return
-        elif isinstance(message, HordeControlModelMessage):
-            if message.control_flag == HordeControlFlag.DOWNLOAD_MODEL:
-                self.download_model(message.horde_model_name)
-            elif message.control_flag == HordeControlFlag.UNLOAD_MODELS_FROM_VRAM:
-                self.unload_models_from_vram()
-            elif message.control_flag == HordeControlFlag.UNLOAD_MODELS_FROM_RAM:
-                self.unload_models_from_ram()
-            else:
-                logger.critical(f"Received unexpected message: {message}")
-                return
-
         elif message.control_flag == HordeControlFlag.END_PROCESS:
             self.send_process_state_change_message(
                 process_state=HordeProcessState.PROCESS_ENDING,
@@ -714,3 +726,13 @@ def _receive_and_handle_control_message(self, message: HordeControlMessage) -> N
             )
 
             self._end_process = True
+            return
+
+        if isinstance(message, HordeControlModelMessage) and message.control_flag == HordeControlFlag.DOWNLOAD_MODEL:
+            self.download_model(horde_model_name=message.horde_model_name)
+
+        if isinstance(message, HordeControlMessage):
+            if message.control_flag == HordeControlFlag.UNLOAD_MODELS_FROM_VRAM:
+                self.unload_models_from_vram()
+            elif message.control_flag == HordeControlFlag.UNLOAD_MODELS_FROM_RAM:
+                self.unload_models_from_ram()
diff --git a/horde_worker_regen/process_management/process_manager.py b/horde_worker_regen/process_management/process_manager.py
index 9cc1746f..1f67fa44 100644
--- a/horde_worker_regen/process_management/process_manager.py
+++ b/horde_worker_regen/process_management/process_manager.py
@@ -32,13 +32,19 @@
 from horde_model_reference.model_reference_records import StableDiffusion_ModelReference
 from horde_sdk import RequestErrorResponse
 from horde_sdk.ai_horde_api import GENERATION_STATE
-from horde_sdk.ai_horde_api.ai_horde_clients import AIHordeAPIAsyncClientSession, AIHordeAPIAsyncSimpleClient
+from horde_sdk.ai_horde_api.ai_horde_clients import (
+    AIHordeAPIAsyncClientSession,
+    AIHordeAPIAsyncSimpleClient,
+    AIHordeAPISimpleClient,
+)
 from horde_sdk.ai_horde_api.apimodels import (
     FindUserRequest,
     GenMetadataEntry,
     ImageGenerateJobPopRequest,
     ImageGenerateJobPopResponse,
     JobSubmitResponse,
+    ModifyWorkerRequest,
+    SingleWorkerDetailsResponse,
     UserDetailsResponse,
 )
 from horde_sdk.ai_horde_api.consts import KNOWN_UPSCALERS, METADATA_TYPE, METADATA_VALUE
@@ -97,18 +103,15 @@
     _async_client_exceptions = (asyncio.exceptions.TimeoutError, aiohttp.client_exceptions.ClientError, OSError)
 
 _excludes_for_job_dump = {
-    "job_image_results": ...,
+    "job_image_results": True,
     "sdk_api_job_info": {
-        "payload": {
-            "prompt",
-            "special",
-        },
-        "skipped": ...,
-        "source_image": ...,
-        "source_mask": ...,
-        "extra_source_images": ...,
-        "r2_upload": ...,
-        "r2_uploads": ...,
+        "payload": {"prompt": True, "special": True},
+        "skipped": True,
+        "source_image": True,
+        "source_mask": True,
+        "extra_source_images": True,
+        "r2_upload": True,
+        "r2_uploads": True,
     },
 }
 
@@ -369,6 +372,11 @@ def on_memory_report(
 
         self[process_id].last_received_timestamp = time.time()
 
+        logger.debug(
+            f"Process {process_id} memory report: "
+            f"ram: {ram_usage_bytes} vram: {vram_usage_bytes} total vram: {total_vram_bytes}",
+        )
+
     def on_process_state_change(self, process_id: int, new_state: HordeProcessState) -> None:
         """Update the process state for the given process ID.
 
@@ -944,6 +952,8 @@ def get_process_total_ram_usage(self) -> int:
     _completed_jobs_lock: Lock_Asyncio
 
     kudos_generated_this_session: float = 0
+    kudos_events: list[tuple[float, float]]
+    """A deque of kudos events, each is a tuple of the time the event occurred and the amount of kudos generated."""
     session_start_time: float = 0
 
     _aiohttp_client_session: aiohttp.ClientSession
@@ -1107,6 +1117,8 @@ def __init__(
         logger.debug(f"Target RAM overhead: {self.target_ram_overhead_bytes / 1024 / 1024 / 1024} GB")
 
         self.enable_performance_mode()
+        if self.bridge_data.remove_maintenance_on_init:
+            self.remove_maintenance()
 
         # Get the total memory of each GPU
         import torch
@@ -1130,6 +1142,8 @@ def __init__(
 
         self._process_message_queue = multiprocessing.Queue()
 
+        self.kudos_events = []
+
         self.stable_diffusion_reference = None
 
         while self.stable_diffusion_reference is None:
@@ -1149,6 +1163,31 @@ def __init__(
                 logger.error(e)
                 time.sleep(5)
 
+    def remove_maintenance(self) -> None:
+        """Removes the maintenance from the named worker."""
+        simple_client = AIHordeAPISimpleClient()
+        worker_details: SingleWorkerDetailsResponse = simple_client.worker_details_by_name(
+            worker_name=self.bridge_data.dreamer_worker_name,
+        )
+        if worker_details is None:
+            logger.debug(
+                f"Worker with name {self.bridge_data.dreamer_worker_name} "
+                "does not appear to exist already to remove maintenance.",
+            )
+            return
+        modify_worker_request = ModifyWorkerRequest(
+            apikey=self.bridge_data.api_key,
+            worker_id=worker_details.id_,
+            maintenance=False,
+        )
+
+        simple_client.worker_modify(modify_worker_request)
+
+        logger.debug(
+            f"Ensured worker with name {self.bridge_data.dreamer_worker_name} "
+            "({worker_details.id_}) is removed from maintenance.",
+        )
+
     def enable_performance_mode(self) -> None:
         """Enable performance mode."""
         if self.bridge_data.high_performance_mode:
@@ -1171,13 +1210,13 @@ def enable_performance_mode(self) -> None:
 
     def is_time_for_shutdown(self) -> bool:
         """Return true if it is time to shut down."""
-        if (
-            all(
-                inference_process.last_process_state == HordeProcessState.PROCESS_ENDING
-                or inference_process.last_process_state == HordeProcessState.PROCESS_ENDED
-                for inference_process in self._process_map.values()
-            )
-            and not self._recently_recovered
+        if self._recently_recovered:
+            return False
+
+        if all(
+            inference_process.last_process_state == HordeProcessState.PROCESS_ENDING
+            or inference_process.last_process_state == HordeProcessState.PROCESS_ENDED
+            for inference_process in self._process_map.values()
         ):
             return True
 
@@ -1332,6 +1371,7 @@ def _start_inference_process(self, pid: int) -> HordeProcessInfo:
                 self._aux_model_lock,
             ),
             kwargs={
+                "very_high_memory_mode": self.bridge_data.very_high_memory_mode,
                 "high_memory_mode": self.bridge_data.high_memory_mode,
                 "amd_gpu": self._amd_gpu,
             },
@@ -1535,7 +1575,11 @@ def receive_and_handle_process_messages(self) -> None:
                     new_state=message.process_state,
                 )
 
-                logger.debug(f"Process {message.process_id} changed state to {message.process_state}")
+                if message.process_state == HordeProcessState.PROCESS_ENDED:
+                    logger.info(f"Process {message.process_id} has ended with message: {message.info}")
+                else:
+                    logger.debug(f"Process {message.process_id} changed state to {message.process_state}")
+
                 if message.process_state == HordeProcessState.INFERENCE_STARTING:
                     # logger.info(f"Process {message.process_id} is starting inference on model {message.info}")
 
@@ -1669,6 +1713,9 @@ def receive_and_handle_process_messages(self) -> None:
                         break
 
                 self.total_num_completed_jobs += 1
+                if self.bridge_data.unload_models_from_vram_often:
+                    self.unload_models_from_vram(process_with_model=self._process_map[message.process_id])
+
                 if message.time_elapsed is not None:
                     logger.info(
                         f"Inference finished for job {message.sdk_api_job_info.id_} on process {message.process_id}. "
@@ -1690,7 +1737,7 @@ def receive_and_handle_process_messages(self) -> None:
                     )
 
                     logger.debug(
-                        f"Job data: {message.sdk_api_job_info.model_dump(exclude=_excludes_for_job_dump)}",
+                        f"Job data: {message.sdk_api_job_info.model_dump(exclude=_excludes_for_job_dump)}",  # type: ignore
                     )
 
                     self.completed_jobs.append(job_info)
@@ -1707,9 +1754,11 @@ def receive_and_handle_process_messages(self) -> None:
                         break
 
                 if completed_job_info is None or completed_job_info.job_image_results is None:
-                    raise ValueError(
-                        f"Expected to find a completed job with ID {message.job_id} but none was found",
+                    logger.error(
+                        f"Expected to find a completed job with ID {message.job_id} but none was found"
+                        "This should only happen when certain process crashes occur.",
                     )
+                    continue
 
                 num_images_censored = 0
                 num_images_csam = 0
@@ -1862,7 +1911,7 @@ def preload_models(self) -> bool:
             will_load_loras = job.payload.loras is not None and len(job.payload.loras) > 0
             seamless_tiling_enabled = job.payload.tiling is not None and job.payload.tiling
 
-            available_process.safe_send_message(
+            if available_process.safe_send_message(
                 HordePreloadInferenceModelMessage(
                     control_flag=HordeControlFlag.PRELOAD_MODEL,
                     horde_model_name=job.model,
@@ -1870,21 +1919,23 @@ def preload_models(self) -> bool:
                     seamless_tiling_enabled=seamless_tiling_enabled,
                     sdk_api_job_info=job,
                 ),
-            )
-            available_process.last_control_flag = HordeControlFlag.PRELOAD_MODEL
+            ):
+                available_process.last_control_flag = HordeControlFlag.PRELOAD_MODEL
 
-            self._horde_model_map.update_entry(
-                horde_model_name=job.model,
-                load_state=ModelLoadState.LOADING,
-                process_id=available_process.process_id,
-            )
+                self._horde_model_map.update_entry(
+                    horde_model_name=job.model,
+                    load_state=ModelLoadState.LOADING,
+                    process_id=available_process.process_id,
+                )
 
-            self._process_map.on_model_load_state_change(
-                process_id=available_process.process_id,
-                horde_model_name=job.model,
-                last_job_referenced=job,
-            )
+                self._process_map.on_model_load_state_change(
+                    process_id=available_process.process_id,
+                    horde_model_name=job.model,
+                    last_job_referenced=job,
+                )
 
+            # Even if the message fails to send, we still want to return True so that we can let the main loop
+            # catch up and potentially replace the process.
             return True
 
         return False
@@ -2029,35 +2080,8 @@ def start_inference(self) -> None:
             )
 
         # Unload all models from vram from any other process that isn't running a job if configured to do so
-        if self.bridge_data.unload_models_from_vram:
-            next_n_models = list(self.get_next_n_models(self.max_inference_processes))
-            for process_info in self._process_map.values():
-                if process_info.process_id == process_with_model.process_id:
-                    continue
-
-                if process_info.is_process_busy():
-                    continue
-
-                if process_info.loaded_horde_model_name is None:
-                    continue
-
-                if len(self.job_deque) == len(self.jobs_in_progress) + len(self.jobs_pending_safety_check):
-                    logger.debug("Not unloading models from VRAM because there are no jobs to make room for.")
-                    continue
-
-                # If the model would be used by another process soon, don't unload it
-                if process_info.loaded_horde_model_name in next_n_models:
-                    continue
-
-                if process_info.last_control_flag != HordeControlFlag.UNLOAD_MODELS_FROM_VRAM:
-                    process_info.safe_send_message(
-                        HordeControlModelMessage(
-                            control_flag=HordeControlFlag.UNLOAD_MODELS_FROM_VRAM,
-                            horde_model_name=process_info.loaded_horde_model_name,
-                        ),
-                    )
-                    process_info.last_job_referenced = None
-                    process_info.last_control_flag = HordeControlFlag.UNLOAD_MODELS_FROM_VRAM
+        if self.bridge_data.unload_models_from_vram_often:
+            self.unload_models_from_vram(process_with_model)
 
         logger.info(f"Starting inference for job {next_job.id_} on process {process_with_model.process_id}")
         # region Log job info
@@ -2124,8 +2148,59 @@ def start_inference(self) -> None:
             )
             self.handle_job_fault(faulted_job=next_job, process_info=process_with_model)
 
+    def unload_models_from_vram(
+        self,
+        process_with_model: HordeProcessInfo,
+    ) -> None:
+        """Unload models from VRAM from processes that are not running a job.
+
+        Args:
+            process_with_model: The process that is running a job.
+        """
+        next_n_models = list(self.get_next_n_models(self.max_inference_processes))
+        for process_info in self._process_map.values():
+            if process_info.process_id == process_with_model.process_id:
+                continue
+
+            if process_info.process_type != HordeProcessType.INFERENCE:
+                continue
+
+            if process_info.is_process_busy():
+                continue
+
+            if process_info.loaded_horde_model_name is not None:
+
+                # if len(self.job_deque) == len(self.jobs_in_progress) + len(self.jobs_pending_safety_check):
+                #     logger.debug("Not unloading models from VRAM because there are no jobs to make room for.")
+                #     continue
+
+                if len(self.bridge_data.image_models_to_load) == 1:
+                    logger.debug("Not unloading models from VRAM because there is only one model to load.")
+                    continue
+
+                # If the model would be used by another process soon, don't unload it
+                if process_info.loaded_horde_model_name in next_n_models:
+                    continue
+
+                if process_info.last_control_flag != HordeControlFlag.UNLOAD_MODELS_FROM_VRAM:
+                    process_info.safe_send_message(
+                        HordeControlModelMessage(
+                            control_flag=HordeControlFlag.UNLOAD_MODELS_FROM_VRAM,
+                            horde_model_name=process_info.loaded_horde_model_name,
+                        ),
+                    )
+                    process_info.last_job_referenced = None
+                    process_info.last_control_flag = HordeControlFlag.UNLOAD_MODELS_FROM_VRAM
+            else:
+                if not process_info.safe_send_message(
+                    HordeControlMessage(
+                        control_flag=HordeControlFlag.UNLOAD_MODELS_FROM_VRAM,
+                    ),
+                ):
+                    self._replace_inference_process(process_info)
+
     def unload_from_ram(self, process_id: int) -> None:
-        """Unload models from a process, either from VRAM or both VRAM and system RAM.
+        """Unload models from a process.
 
         Args:
             process_id: The process to unload models from.
@@ -2135,29 +2210,37 @@ def unload_from_ram(self, process_id: int) -> None:
 
         process_info = self._process_map[process_id]
 
-        if process_info.loaded_horde_model_name is None:
-            raise ValueError(f"process_id {process_id} is not loaded with a model")
+        if process_info.process_type != HordeProcessType.INFERENCE:
+            logger.warning(f"Process {process_id} is not an inference process, not unloading models")
+            return
+
+        if process_info.loaded_horde_model_name is not None:
+            if not self._horde_model_map.is_model_loaded(process_info.loaded_horde_model_name):
+                raise ValueError(f"process_id {process_id} is references an invalid model`")
+
+            if process_info.last_control_flag != HordeControlFlag.UNLOAD_MODELS_FROM_RAM:
+                process_info.safe_send_message(
+                    HordeControlModelMessage(
+                        control_flag=HordeControlFlag.UNLOAD_MODELS_FROM_RAM,
+                        horde_model_name=process_info.loaded_horde_model_name,
+                    ),
+                )
 
-        if not self._horde_model_map.is_model_loaded(process_info.loaded_horde_model_name):
-            raise ValueError(f"process_id {process_id} is loaded with a model that is not loaded")
+                process_info.last_job_referenced = None
+                process_info.last_control_flag = HordeControlFlag.UNLOAD_MODELS_FROM_RAM
 
-        if process_info.last_control_flag != HordeControlFlag.UNLOAD_MODELS_FROM_RAM:
+                self._horde_model_map.update_entry(
+                    horde_model_name=process_info.loaded_horde_model_name,
+                    load_state=ModelLoadState.ON_DISK,
+                    process_id=process_id,
+                )
+        else:
             process_info.safe_send_message(
-                HordeControlModelMessage(
+                HordeControlMessage(
                     control_flag=HordeControlFlag.UNLOAD_MODELS_FROM_RAM,
-                    horde_model_name=process_info.loaded_horde_model_name,
                 ),
             )
 
-            process_info.last_job_referenced = None
-            process_info.last_control_flag = HordeControlFlag.UNLOAD_MODELS_FROM_RAM
-
-            self._horde_model_map.update_entry(
-                horde_model_name=process_info.loaded_horde_model_name,
-                load_state=ModelLoadState.ON_DISK,
-                process_id=process_id,
-            )
-
             self._process_map.on_model_load_state_change(
                 process_id=process_id,
                 horde_model_name=None,
@@ -2202,29 +2285,36 @@ def unload_models(self) -> None:
         if len(self.job_deque) == len(self.jobs_in_progress) + len(self.jobs_pending_safety_check):
             return
 
+        # 1 thread, 1 model, no need to unload as it should always be in use (or at least available)
+        if self._max_concurrent_inference_processes == 1 and len(self.bridge_data.image_models_to_load) == 1:
+            return
+
         next_n_models: set[str] = self.get_next_n_models(self.max_inference_processes)
 
         for process_info in self._process_map.values():
-            if process_info.is_process_busy():
+            if process_info.process_type != HordeProcessType.INFERENCE:
                 continue
 
-            if process_info.loaded_horde_model_name is None:
+            if process_info.is_process_busy():
                 continue
 
-            if self._horde_model_map.is_model_loading(process_info.loaded_horde_model_name):
-                continue
+            if process_info.loaded_horde_model_name is not None:
+                if self._horde_model_map.is_model_loading(process_info.loaded_horde_model_name):
+                    continue
 
-            if (
-                self._horde_model_map.root[process_info.loaded_horde_model_name].horde_model_load_state
-                == ModelLoadState.IN_USE
-            ):
-                continue
+                if (
+                    self._horde_model_map.root[process_info.loaded_horde_model_name].horde_model_load_state
+                    == ModelLoadState.IN_USE
+                ):
+                    continue
 
-            if process_info.loaded_horde_model_name in next_n_models:
-                continue
+                if process_info.loaded_horde_model_name in next_n_models:
+                    logger.debug(
+                        f"Model {process_info.loaded_horde_model_name} is in use by another process, not unloading",
+                    )
+                    continue
 
-            if self.get_process_total_ram_usage() > self.target_ram_bytes_used:
-                self.unload_from_ram(process_info.process_id)
+            self.unload_from_ram(process_info.process_id)
 
     def start_evaluate_safety(self) -> None:
         """Start evaluating the safety of the next job pending a safety check, if any."""
@@ -2508,7 +2598,7 @@ async def _do_upload(new_submit: PendingSubmitJob, image_in_buffer_bytes: bytes)
         if new_submit.completed_job_info.state != GENERATION_STATE.faulted:
             logger.success(
                 f"Submitted job {new_submit.job_id} (model: "
-                f"{new_submit.completed_job_info.sdk_api_job_info.model}) for {job_submit_response.reward:.2f} "
+                f"{new_submit.completed_job_info.sdk_api_job_info.model}) for {job_submit_response.reward:,.2f} "
                 f"kudos. Job popped {time_taken} seconds ago "
                 f"and took {new_submit.completed_job_info.time_to_generate:.2f} "
                 f"to generate. ({kudos_per_second * new_submit.batch_count:.2f} "
@@ -2532,6 +2622,7 @@ async def _do_upload(new_submit: PendingSubmitJob, image_in_buffer_bytes: bytes)
             self._num_jobs_faulted += 1
 
         self.kudos_generated_this_session += job_submit_response.reward
+        self.kudos_events.append((time.time(), job_submit_response.reward))
         new_submit.succeed(new_submit.kudos_reward, new_submit.kudos_per_second)
         return new_submit
 
@@ -2700,7 +2791,7 @@ async def api_submit_job(self) -> None:
                             ):
 
                                 model_dump = hji.model_dump(
-                                    exclude=_excludes_for_job_dump,
+                                    exclude=_excludes_for_job_dump,  # type: ignore
                                 )
                                 if (
                                     self.stable_diffusion_reference is not None
@@ -2809,6 +2900,9 @@ async def api_submit_job(self) -> None:
     _last_job_pop_time = 0.0
     """The time at which the last job was popped from the API."""
 
+    def _last_pop_recently(self) -> bool:
+        return (time.time() - self._last_job_pop_time) < 10
+
     _last_job_submitted_time = time.time()
     """The time at which the last job was submitted to the API."""
 
@@ -3069,6 +3163,11 @@ async def _get_source_images(self, job_pop_response: ImageGenerateJobPopResponse
         return job_pop_response
 
     _last_pop_no_jobs_available: bool = False
+    _last_pop_no_jobs_available_time: float = 0.0
+    _time_spent_no_jobs_available: float = 0.0
+    _too_many_consecutive_failed_jobs: bool = False
+    _too_many_consecutive_failed_jobs_time: float = 0.0
+    _too_many_consecutive_failed_jobs_wait_time = 180
 
     @logger.catch(reraise=True)
     async def api_job_pop(self) -> None:
@@ -3076,18 +3175,29 @@ async def api_job_pop(self) -> None:
         if self._shutting_down:
             return
 
+        cur_time = time.time()
+
+        if self._too_many_consecutive_failed_jobs:
+            if (
+                cur_time - self._too_many_consecutive_failed_jobs_time
+                > self._too_many_consecutive_failed_jobs_wait_time
+            ):
+                self._consecutive_failed_jobs = 0
+                self._too_many_consecutive_failed_jobs = False
+                logger.debug("Resuming job pops after too many consecutive failed jobs")
+            return
+
         if self._consecutive_failed_jobs >= 3:
             logger.error(
                 "Too many consecutive failed jobs, pausing job pops. "
                 "Please look into what happened and let the devs know. ",
-                "Waiting 180 seconds...",
+                f"Waiting {self._too_many_consecutive_failed_jobs_wait_time} seconds...",
             )
             if self.bridge_data.exit_on_unhandled_faults:
                 logger.error("Exiting due to exit_on_unhandled_faults being enabled")
                 self._abort()
-            await asyncio.sleep(180)
-            self._consecutive_failed_jobs = 0
-            logger.info("Resuming job pops")
+            self._too_many_consecutive_failed_jobs = True
+            self._too_many_consecutive_failed_jobs_time = cur_time
             return
 
         max_jobs_in_queue = self.bridge_data.queue_size + 1
@@ -3122,9 +3232,9 @@ async def api_job_pop(self) -> None:
         # If there are long running jobs, don't start any more even if there is space in the deque
         if self.should_wait_for_pending_megapixelsteps():
             if self.get_pending_megapixelsteps() < 40:
-                seconds_to_wait = self.get_pending_megapixelsteps() * 0.5
+                seconds_to_wait = self.get_pending_megapixelsteps() * 0.6
             elif self.get_pending_megapixelsteps() < 80:
-                seconds_to_wait = self.get_pending_megapixelsteps() * 0.7
+                seconds_to_wait = self.get_pending_megapixelsteps() * 0.8
             else:
                 seconds_to_wait = self.get_pending_megapixelsteps() * 0.9
 
@@ -3240,6 +3350,8 @@ async def api_job_pop(self) -> None:
                 allow_post_processing=self.bridge_data.allow_post_processing,
                 allow_controlnet=self.bridge_data.allow_controlnet,
                 allow_sdxl_controlnet=self.bridge_data.allow_sdxl_controlnet,
+                extra_slow_worker=self.bridge_data.extra_slow_worker,
+                limit_max_steps=self.bridge_data.limit_max_steps,
                 allow_lora=self.bridge_data.allow_lora,
                 amount=self.bridge_data.max_batch,
             )
@@ -3287,12 +3399,20 @@ async def api_job_pop(self) -> None:
 
         if job_pop_response.id_ is None:
             logger.info(info_string)
+            cur_time = time.time()
+            if self._last_pop_no_jobs_available_time == 0.0:
+                self._last_pop_no_jobs_available_time = cur_time
+
+            self._time_spent_no_jobs_available += cur_time - self._last_pop_no_jobs_available_time
+            self._last_pop_no_jobs_available_time = cur_time
+
             self._last_pop_no_jobs_available = True
             return
 
         self.job_faults[job_pop_response.id_] = []
 
         self._last_pop_no_jobs_available = False
+        self._last_pop_no_jobs_available_time = 0.0
 
         logger.info(
             f"Popped job {job_pop_response.id_} "
@@ -3342,6 +3462,107 @@ async def api_job_pop(self) -> None:
 
     _current_worker_id: str | None = None
 
+    def calculate_kudos_info(self) -> None:
+        """Calculate and log information about the kudos generated in the current session."""
+        time_since_session_start = time.time() - self.session_start_time
+        kudos_per_hour_session = self.kudos_generated_this_session / time_since_session_start * 3600
+
+        kudos_total_past_hour = self.calculate_kudos_totals()
+
+        kudos_info_string = self.generate_kudos_info_string(
+            time_since_session_start,
+            kudos_per_hour_session,
+            kudos_total_past_hour,
+        )
+
+        self.log_kudos_info(kudos_info_string)
+
+    def calculate_kudos_totals(self) -> float:
+        """Calculate the total kudos generated in the past hour.
+
+        Returns:
+            float: The total kudos generated in the past hour.
+        """
+        kudos_total_past_hour = 0.0
+        num_events_found = 0
+        current_time = time.time()
+
+        for event_time, kudos in reversed(self.kudos_events):
+            if current_time - event_time > 3600:
+                break
+
+            num_events_found += 1
+            kudos_total_past_hour += kudos
+
+        elements_to_remove = len(self.kudos_events) - num_events_found
+        if elements_to_remove > 0:
+            self.kudos_events = self.kudos_events[:-elements_to_remove]
+
+        return kudos_total_past_hour
+
+    def generate_kudos_info_string(
+        self,
+        time_since_session_start: float,
+        kudos_per_hour_session: float,
+        kudos_total_past_hour: float,
+    ) -> str:
+        """Generate a string with information about the kudos generated in the current session.
+
+        Args:
+            time_since_session_start (float): The time since the session started.
+            kudos_per_hour_session (float): The kudos per hour generated in the current session.
+            kudos_total_past_hour (float): The total kudos generated in the past hour.
+
+        Returns:
+            str: A string with information about the kudos generated in the current session.
+        """
+        kudos_info_string_elements = []
+        if time_since_session_start < 3600:
+            kudos_info_string_elements = [
+                f"Total Session Kudos: {self.kudos_generated_this_session:,.2f} over "
+                f"{time_since_session_start / 60:.2f} minutes",
+            ]
+        else:
+            kudos_info_string_elements = [
+                f"Total Session Kudos: {self.kudos_generated_this_session:,.2f} over "
+                f"{time_since_session_start / 3600:.2f} hours",
+            ]
+
+        if time_since_session_start > 3600:
+            kudos_info_string_elements.append(
+                f"Session: {kudos_per_hour_session:,.2f} (actual) kudos/hr",
+            )
+            # kudos_info_string_elements.append(
+            #     f"Last Hour: {kudos_total_past_hour:,.2f} kudos",
+            # )
+        else:
+            kudos_info_string_elements.append(
+                f"Session: {kudos_per_hour_session:,.2f} (extrapolated) kudos/hr",
+            )
+            # kudos_info_string_elements.append(
+            #     "Last Hour: (pending) kudos",
+            # )
+
+        return " | ".join(kudos_info_string_elements)
+
+    def log_kudos_info(self, kudos_info_string: str) -> None:
+        """Log the kudos information string.
+
+        Args:
+            kudos_info_string (str): The kudos information string to log.
+        """
+        if self.kudos_generated_this_session > 0:
+            logger.success(kudos_info_string)
+
+        logger.debug(f"len(kudos_events): {len(self.kudos_events)}")
+        if self.user_info is not None and self.user_info.kudos_details is not None:
+            logger.info(
+                f"Total Kudos Accumulated: {self.user_info.kudos_details.accumulated:,.2f} "
+                f"(all workers for {self.user_info.username})",
+            )
+            if self.user_info.kudos_details.accumulated is not None and self.user_info.kudos_details.accumulated < 0:
+                logger.info("Negative kudos means you've requested more than you've earned. This can be normal.")
+
     async def api_get_user_info(self) -> None:
         """Get the information associated with this API key from the API."""
         if self._shutting_down:
@@ -3362,21 +3583,7 @@ async def api_get_user_info(self) -> None:
             self._user_info_failed_reason = None
 
             if self.user_info.kudos_details is not None:
-                # print kudos this session and kudos per hour based on self.session_start_time
-                kudos_per_hour = self.kudos_generated_this_session / (time.time() - self.session_start_time) * 3600
-
-                if self.kudos_generated_this_session > 0:
-                    logger.success(
-                        f"Kudos this session: {self.kudos_generated_this_session:.2f} "
-                        f"(~{kudos_per_hour:.2f} kudos/hour)",
-                    )
-
-                logger.info(f"Worker Kudos Accumulated: {self.user_info.kudos_details.accumulated:.2f}")
-                if (
-                    self.user_info.kudos_details.accumulated is not None
-                    and self.user_info.kudos_details.accumulated < 0
-                ):
-                    logger.info("Negative kudos means you've requested more than you've earned. This can be normal.")
+                self.calculate_kudos_info()
 
         except _async_client_exceptions as e:
             self._user_info_failed = True
@@ -3547,7 +3754,7 @@ async def _process_control_loop(self) -> None:
                             await asyncio.sleep(self._loop_interval / 2)
                             self._replace_all_safety_process()
 
-                    # self.unload_models()
+                    #  self.unload_models()
 
                     if self._shutting_down:
                         self.end_inference_processes()
@@ -3587,9 +3794,59 @@ async def _process_control_loop(self) -> None:
 
     _last_deadlock_detected_time = 0.0
     _in_deadlock = False
+    _in_queue_deadlock = False
+    _last_queue_deadlock_detected_time = 0.0
+    _queue_deadlock_model: str | None = None
+    _queue_deadlock_process_id: int | None = None
 
     def detect_deadlock(self) -> None:
         """Detect if there are jobs in the queue but no processes doing anything."""
+
+        def _print_deadlock_info() -> None:
+            logger.debug(f"Jobs in queue: {len(self.job_deque)}")
+            logger.debug(f"Jobs in progress: {len(self.jobs_in_progress)}")
+            logger.debug(f"Jobs pending safety check: {len(self.jobs_pending_safety_check)}")
+            logger.debug(f"Jobs being safety checked: {len(self.jobs_being_safety_checked)}")
+            logger.debug(f"Jobs completed: {len(self.completed_jobs)}")
+            logger.debug(f"Jobs faulted: {self._num_jobs_faulted}")
+            logger.debug(f"horde_model_map: {self._horde_model_map}")
+            logger.debug(f"process_map: {self._process_map}")
+
+        if self._last_pop_recently():
+            # We just popped a job, lets allow some time for gears to start turning
+            # before we assume we're in a deadlock
+            return
+
+        if (
+            not self._in_queue_deadlock
+            and (self._process_map.num_busy_processes() == 0 and len(self.job_deque) > 0)
+            and len(self.jobs_in_progress) == 0
+        ):
+
+            currently_loaded_models = set()
+            model_process_map: dict[str, int] = {}
+            for process in self._process_map.values():
+                if process.loaded_horde_model_name is not None:
+                    currently_loaded_models.add(process.loaded_horde_model_name)
+                    model_process_map[process.loaded_horde_model_name] = process.process_id
+
+            for job in self.job_deque:
+                if job.model in currently_loaded_models:
+                    self._in_queue_deadlock = True
+                    self._last_queue_deadlock_detected_time = time.time()
+                    self._queue_deadlock_model = job.model
+                    self._queue_deadlock_process_id = model_process_map[job.model]
+
+        elif self._in_queue_deadlock and (self._last_queue_deadlock_detected_time + 10) < time.time():
+            logger.debug("Queue deadlock detected")
+            _print_deadlock_info()
+            logger.debug(f"Model causing deadlock: {self._queue_deadlock_model}")
+            if self._queue_deadlock_process_id is not None:
+                self._replace_inference_process(self._process_map[self._queue_deadlock_process_id])
+            self._in_queue_deadlock = False
+            self._queue_deadlock_model = None
+            self._queue_deadlock_process_id = None
+
         if (
             (not self._in_deadlock)
             and (len(self.job_deque) > 0 or len(self.jobs_in_progress) > 0 or len(self.jobs_lookup) > 0)
@@ -3598,19 +3855,19 @@ def detect_deadlock(self) -> None:
             self._last_deadlock_detected_time = time.time()
             self._in_deadlock = True
             logger.debug("Deadlock detected")
-            logger.debug(f"Jobs in queue: {len(self.job_deque)}")
-            logger.debug(f"Jobs in progress: {len(self.jobs_in_progress)}")
-            logger.debug(f"Jobs pending safety check: {len(self.jobs_pending_safety_check)}")
-            logger.debug(f"Jobs being safety checked: {len(self.jobs_being_safety_checked)}")
-            logger.debug(f"Jobs completed: {len(self.completed_jobs)}")
-            logger.debug(f"Jobs faulted: {self._num_jobs_faulted}")
+            _print_deadlock_info()
         elif (
             self._in_deadlock
             and (self._last_deadlock_detected_time + 10) < time.time()
             and self._process_map.num_busy_processes() == 0
         ):
-            logger.debug("Deadlock still detected after 10 seconds. Attempting to recover.")
-            self._purge_jobs()
+            if self.bridge_data.exit_on_unhandled_faults:
+                logger.error("Exiting due to exit_on_unhandled_faults being enabled")
+                self._abort()
+            else:
+                logger.debug("Deadlock still detected after 10 seconds. Attempting to recover.")
+                self._purge_jobs()
+
             self._in_deadlock = False
         elif (
             self._in_deadlock
@@ -3632,6 +3889,8 @@ def print_status_method(self) -> None:
                     [
                         f"dreamer_name: {self.bridge_data.dreamer_worker_name}",
                         f"(v{horde_worker_regen.__version__})",
+                        f"horde user: {self.user_info.username if self.user_info is not None else 'Unknown'}",
+                        f"num_models: {len(self.bridge_data.image_models_to_load)}",
                         f"max_power: {self.bridge_data.max_power}",
                         f"max_threads: {self.max_concurrent_inference_processes}",
                         f"queue_size: {self.bridge_data.queue_size}",
@@ -3647,12 +3906,38 @@ def print_status_method(self) -> None:
                         f"allow_controlnet: {self.bridge_data.allow_controlnet}",
                         f"allow_sdxl_controlnet: {self.bridge_data.allow_sdxl_controlnet}",
                         f"allow_post_processing: {self.bridge_data.allow_post_processing}",
+                        f"custom_models: {bool(self.bridge_data.custom_models)}",
                         f"jobs_pending_safety_check: {len(self.jobs_pending_safety_check)}",
                         f"jobs_being_safety_checked: {len(self.jobs_being_safety_checked)}",
                         f"jobs_in_progress: {len(self.jobs_in_progress)}",
                     ],
                 ),
             )
+            logger.debug(
+                " | ".join(
+                    [
+                        f"high_performance_mode: {self.bridge_data.high_performance_mode}",
+                        f"moderate_performance_mode: {self.bridge_data.moderate_performance_mode}",
+                        f"high_memory_mode: {self.bridge_data.high_memory_mode}",
+                        f"very_high_memory_mode: {self.bridge_data.very_high_memory_mode}",
+                        f"unload_models_from_vram_often: {self.bridge_data.unload_models_from_vram_often}",
+                    ],
+                ),
+            )
+
+            logger.debug(
+                " | ".join(
+                    [
+                        f"post_process_job_overlap: {self.bridge_data.post_process_job_overlap}",
+                        f"preload_timeout: {self.bridge_data.preload_timeout}",
+                        f"download_timeout: {self.bridge_data.download_timeout}",
+                        f"post_process_timeout: {self.bridge_data.post_process_timeout}",
+                        f"cycle_process_on_model_change: {self.bridge_data.cycle_process_on_model_change}",
+                        f"exit_on_unhandled_faults: {self.bridge_data.exit_on_unhandled_faults}",
+                    ],
+                ),
+            )
+
             jobs = [f"<{x.id_}: {x.model}>" for x in self.job_deque]
             logger.info(f'Jobs: {", ".join(jobs)}')
 
@@ -3669,11 +3954,12 @@ def print_status_method(self) -> None:
 
             job_info_message = "Session job info: " + " | ".join(
                 [
-                    f"popped: {len(self.job_deque)} (eMPS: {self.get_pending_megapixelsteps()})",
+                    f"currently popped: {len(self.job_deque)} (eMPS: {self.get_pending_megapixelsteps()})",
                     f"submitted: {self.total_num_completed_jobs}",
                     f"faulted: {self._num_jobs_faulted}",
                     f"slow_jobs: {self._num_job_slowdowns}",
                     f"process_recoveries: {self._num_process_recoveries}",
+                    f"{self._time_spent_no_jobs_available:.2f} seconds without jobs",
                 ],
             )
 
@@ -3690,6 +3976,63 @@ def print_status_method(self) -> None:
                     "`git pull` and `update-runtime` to update.",
                 )
 
+            if self.bridge_data.extra_slow_worker:
+                if not self.bridge_data.limit_max_steps:
+                    logger.warning(
+                        "Extra slow worker mode is enabled, but limit_max_steps is not enabled. "
+                        "Consider enabling limit_max_steps to prevent long running jobs.",
+                    )
+                if self.bridge_data.max_batch > 1:
+                    logger.warning(
+                        "Extra slow worker mode is enabled, but max_batch is greater than 1. "
+                        "Consider setting max_batch to 1 to prevent long running batch jobs.",
+                    )
+                if self.bridge_data.allow_sdxl_controlnet:
+                    logger.warning(
+                        "Extra slow worker mode is enabled, but allow_sdxl_controlnet is enabled. "
+                        "Consider disabling allow_sdxl_controlnet to prevent long running jobs.",
+                    )
+
+            for device in self._device_map.root.values():
+                total_memory_mb = device.total_memory / 1024 / 1024
+                if total_memory_mb < 10_000 and self.bridge_data.high_memory_mode:
+                    logger.warning(
+                        f"Device {device.device_name} ({device.device_index}) has less than 10GB of memory. "
+                        "This may cause issues with `high_memory_mode` enabled.",
+                    )
+                elif total_memory_mb > 20_000 and not self.bridge_data.high_memory_mode:
+                    logger.warning(
+                        f"Device {device.device_name} ({device.device_index}) has more than 20GB of memory. "
+                        "You should enable `high_memory_mode` in your config to take advantage of this.",
+                    )
+                elif total_memory_mb > 20_000 and self.bridge_data.extra_slow_worker:
+                    logger.warning(
+                        f"Device {device.device_name} ({device.device_index}) has more than 20GB of memory. "
+                        "There are very few GPUs with this much memory that should be running in extra slow worker "
+                        "mode. Consider disabling `extra_slow_worker` in your config.",
+                    )
+
+            if self._too_many_consecutive_failed_jobs:
+                time_since_failure = time.time() - self._too_many_consecutive_failed_jobs_time
+                logger.error(
+                    "Too many consecutive failed jobs. This may be due to a misconfiguration or other issue. "
+                    "Please check your logs and configuration.",
+                )
+                logger.error(
+                    f"Time since last job failure: {time_since_failure:.2f}s. "
+                    f"{self._too_many_consecutive_failed_jobs_wait_time} seconds must pass before resuming.",
+                )
+
+            if self._time_spent_no_jobs_available > 60 * 5:
+                logger.warning(
+                    "Your worker spent more than 5 minutes without jobs. This may be due to low demand. "
+                    "However, offering more models or increasing your max_power may help increase the number of jobs "
+                    "you receive.",
+                )
+
+            if self._shutting_down:
+                logger.warning("Shutting down after current jobs are finished...")
+
             self._last_status_message_time = time.time()
 
     _bridge_data_loop_interval = 1.0
@@ -3915,51 +4258,14 @@ def _abort(self) -> None:
 
     def replace_hung_processes(self) -> bool:
         """Replaces processes that haven't checked in since `process_timeout` seconds in bridgeData."""
-        now = time.time()
-
-        import threading
-
-        def timed_unset_recently_recovered() -> None:
-            time.sleep(60)
-            self._recently_recovered = False
-
-        # If every process hasn't done anything for a while or if we haven't submitted a job for a while,
-        # AND the last job pop returned a job, we're in a black hole and we need to exit because none of the ways to
-        # recover worked
-        if (
-            all(
-                ((now - process_info.last_received_timestamp) > self.bridge_data.process_timeout)
-                for process_info in self._process_map.values()
-            )
-            or ((now - self._last_job_submitted_time) > self.bridge_data.process_timeout)
-        ) and not (self._last_pop_no_jobs_available or self._recently_recovered):
-            self._purge_jobs()
-
-            if self.bridge_data.exit_on_unhandled_faults:
-                logger.error("All processes have been unresponsive for too long, exiting.")
-
-                self._abort()
-                logger.error("Exiting due to exit_on_unhandled_faults being enabled")
-
-                return True
-
-            logger.error("All processes have been unresponsive for too long, attempting to recover.")
-            self._recently_recovered = True
-
-            for process_info in self._process_map.values():
-                if process_info.process_type == HordeProcessType.INFERENCE:
-                    self._replace_inference_process(process_info)
-
-            threading.Thread(target=timed_unset_recently_recovered).start()
-
-            return True
-
         if self._shutting_down:
             return False
 
         if self._last_pop_no_jobs_available or self._recently_recovered:
             return False
 
+        now = time.time()
+
         any_replaced = False
         for process_info in self._process_map.values():
             if self._process_map.is_stuck_on_inference(process_info.process_id):
@@ -3999,7 +4305,41 @@ def timed_unset_recently_recovered() -> None:
                     if self._check_and_replace_process(process_info, timeout, state, error_message):
                         any_replaced = True
                         self._recently_recovered = True
-                        break
+
+        import threading
+
+        def timed_unset_recently_recovered() -> None:
+            time.sleep(self.bridge_data.preload_timeout)
+            self._recently_recovered = False
+
+        # If every process hasn't done anything for a while or if we haven't submitted a job for a while,
+        # AND the last job pop returned a job, we're in a black hole and we need to exit because none of the ways to
+        # recover worked
+        if (
+            all(
+                ((now - process_info.last_received_timestamp) > self.bridge_data.process_timeout)
+                for process_info in self._process_map.values()
+            )
+            or ((now - self._last_job_submitted_time) > self.bridge_data.process_timeout)
+        ) and not (self._last_pop_no_jobs_available or self._recently_recovered):
+            self._purge_jobs()
+
+            if self.bridge_data.exit_on_unhandled_faults:
+                logger.error("All processes have been unresponsive for too long, exiting.")
+
+                self._abort()
+                logger.error("Exiting due to exit_on_unhandled_faults being enabled")
+
+                return True
+
+            logger.error("All processes have been unresponsive for too long, attempting to recover.")
+            self._recently_recovered = True
+
+            for process_info in self._process_map.values():
+                if process_info.process_type == HordeProcessType.INFERENCE:
+                    self._replace_inference_process(process_info)
+
+            threading.Thread(target=timed_unset_recently_recovered).start()
 
         if any_replaced:
             threading.Thread(target=timed_unset_recently_recovered).start()
diff --git a/horde_worker_regen/process_management/worker_entry_points.py b/horde_worker_regen/process_management/worker_entry_points.py
index dc9c2957..c7e77941 100644
--- a/horde_worker_regen/process_management/worker_entry_points.py
+++ b/horde_worker_regen/process_management/worker_entry_points.py
@@ -20,7 +20,9 @@ def start_inference_process(
     disk_lock: Lock,
     aux_model_lock: Lock,
     *,
+    low_memory_mode: bool = False,
     high_memory_mode: bool = False,
+    very_high_memory_mode: bool = False,
     amd_gpu: bool = False,
 ) -> None:
     """Start an inference process.
@@ -32,7 +34,10 @@ def start_inference_process(
         inference_semaphore (Semaphore): The semaphore to use to limit concurrent inference.
         disk_lock (Lock): The lock to use for disk access.
         aux_model_lock (Lock): The lock to use for auxiliary model downloading.
+        low_memory_mode (bool, optional): If true, the process will attempt to use less memory. Defaults to True.
         high_memory_mode (bool, optional): If true, the process will attempt to use more memory. Defaults to False.
+        very_high_memory_mode (bool, optional): If true, the process will attempt to use even more memory.
+            Defaults to False.
         amd_gpu (bool, optional): If true, the process will attempt to use AMD GPU-specific optimisations.
             Defaults to False.
     """
@@ -59,15 +64,33 @@ def start_inference_process(
             if amd_gpu:
                 extra_comfyui_args.append("--use-pytorch-cross-attention")
 
-            if high_memory_mode:
-                extra_comfyui_args.append("--highvram")
+            models_not_to_force_load = ["flux"]
+
+            if very_high_memory_mode:
+                extra_comfyui_args.append("--gpu-only")
+            elif high_memory_mode:
+                extra_comfyui_args.append("--normalvram")
+                models_not_to_force_load.extend(
+                    [
+                        "cascade",
+                    ],
+                )
+            elif low_memory_mode:
+                extra_comfyui_args.append("--novram")
+                models_not_to_force_load.extend(
+                    [
+                        "sdxl",
+                        "cascade",
+                    ],
+                )
 
             with logger.catch(reraise=True):
                 hordelib.initialise(
                     setup_logging=None,
                     process_id=process_id,
                     logging_verbosity=0,
-                    force_normal_vram_mode=not high_memory_mode,
+                    force_normal_vram_mode=False,
+                    models_not_to_force_load=models_not_to_force_load,
                     extra_comfyui_args=extra_comfyui_args,
                 )
         except Exception as e:
@@ -130,9 +153,6 @@ def start_safety_process(
             if amd_gpu:
                 extra_comfyui_args.append("--use-pytorch-cross-attention")
 
-            if high_memory_mode:
-                extra_comfyui_args.append("--highvram")
-
             with logger.catch(reraise=True):
                 hordelib.initialise(
                     setup_logging=None,
diff --git a/horde_worker_regen/version_meta.py b/horde_worker_regen/version_meta.py
index d8acd2c8..4dd2aaf3 100644
--- a/horde_worker_regen/version_meta.py
+++ b/horde_worker_regen/version_meta.py
@@ -80,7 +80,7 @@ def do_version_check() -> None:
             logger.warning(
                 f"Current worker version {horde_worker_regen.__version__} has a required update to "
                 f"{version_meta.required_min_version}. "
-                f"Please update to the required version by {version_meta.required_min_version_update_date}.",
+                f"Please update to the required version by 00:00 {version_meta.required_min_version_update_date} UTC.",
             )
             logger.warning(reason_for_update_str)
 
diff --git a/pyproject.toml b/pyproject.toml
index 6e80dd6c..96dd4e7d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "horde_worker_regen"
-version = "8.1.2"
+version = "9.0.2"
 description = "Allows you to connect to the AI Horde and generate images for users."
 authors = [
     {name = "tazlin", email = "tazlin.on.github@gmail.com"},
@@ -96,6 +96,7 @@ exclude = '''
  | \.mypy_cache
  | \.tox
  | \.venv
+ | venv
  | _build
  | buck-out
  | build
diff --git a/requirements.dev.txt b/requirements.dev.txt
index 976ab2bb..35c65242 100644
--- a/requirements.dev.txt
+++ b/requirements.dev.txt
@@ -1,9 +1,9 @@
-pytest==8.3.1
-mypy==1.11.0
-black==24.4.2
-ruff==0.5.4
-tox~=4.16.0
-pre-commit~=3.7.1
+pytest==8.3.3
+mypy==1.11.2
+black==24.8.0
+ruff==0.6.5
+tox~=4.18.1
+pre-commit~=3.8.0
 build>=0.10.0
 coverage>=7.2.7
 
diff --git a/requirements.rocm.txt b/requirements.rocm.txt
index bc1b4408..af71193b 100644
--- a/requirements.rocm.txt
+++ b/requirements.rocm.txt
@@ -1,10 +1,10 @@
 numpy==1.26.4
 torch==2.3.1+rocm6.0
 
-horde_sdk~=0.14.0
+horde_sdk~=0.14.7
 horde_safety~=0.2.3
-horde_engine~=2.13.3
-horde_model_reference~=0.8.1
+horde_engine~=2.15.2
+horde_model_reference~=0.9.0
 
 python-dotenv
 ruamel.yaml
@@ -13,7 +13,7 @@ wheel
 
 python-Levenshtein
 
-pydantic>=2.7.4
+pydantic>=2.9.2
 typing_extensions
 requests
 StrEnum
diff --git a/requirements.txt b/requirements.txt
index f41f457d..1f3f4f75 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,10 +1,10 @@
 numpy==1.26.4
 torch==2.3.1
 
-horde_sdk~=0.14.0
+horde_sdk~=0.14.7
 horde_safety~=0.2.3
-horde_engine~=2.13.3
-horde_model_reference>=0.8.1
+horde_engine~=2.15.2
+horde_model_reference>=0.9.0
 
 python-dotenv
 ruamel.yaml
@@ -12,7 +12,7 @@ semver
 
 python-Levenshtein
 
-pydantic>=2.7.4
+pydantic>=2.9.2
 typing_extensions
 requests
 StrEnum