BitMind-AI · benliang99 · Oct 16, 2024 · Aug 12, 2024 · Aug 12, 2024 · Aug 12, 2024
diff --git a/autoupdate_validator_steps.sh b/autoupdate_validator_steps.sh
@@ -7,4 +7,5 @@
 echo $CONDA_PREFIX
 $CONDA_PREFIX/bin/pip install -e .
 $CONDA_PREFIX/bin/python bitmind/download_data.py
+$CONDA_PREFIX/bin/python verify_validator_models.py
 echo "Autoupdate steps complete :)"
diff --git a/bitmind/constants.py b/bitmind/constants.py
@@ -1,14 +1,16 @@
 import os
+import torch
 
 
-WANDB_PROJECT = 'bitmind-subnet'
+WANDB_PROJECT = 'bitmind'
 WANDB_ENTITY = 'bitmindai'
 
 DATASET_META = {
     "real": [
         {"path": "bitmind/open-images-v7", "create_splits": False},
         {"path": "bitmind/ffhq-256", "create_splits": False},
-        {"path": "bitmind/celeb-a-hq", "create_splits": False}
+        {"path": "bitmind/celeb-a-hq", "create_splits": False},
+        {"path": "bitmind/MS-COCO-unique-256", "create_splits": False}
     ],
     "fake": [
         {"path": "bitmind/realvis-xl", "create_splits": False},
@@ -39,19 +41,36 @@
         {
             "path": "stabilityai/stable-diffusion-xl-base-1.0",
             "use_safetensors": True,
+            "torch_dtype": torch.float16,
             "variant": "fp16",
             "pipeline": "StableDiffusionXLPipeline"
         },
         {
             "path": "SG161222/RealVisXL_V4.0",
             "use_safetensors": True,
+            "torch_dtype": torch.float16,
             "variant": "fp16",
             "pipeline": "StableDiffusionXLPipeline"
         },
         {
             "path": "Corcelio/mobius",
             "use_safetensors": True,
+            "torch_dtype": torch.float16,
             "pipeline": "StableDiffusionXLPipeline"
+        },
+        {
+            "path": 'black-forest-labs/FLUX.1-dev',
+            "use_safetensors": True,
+            "torch_dtype": torch.bfloat16,
+            "generate_args": {
+                "guidance_scale": 2,
+                "num_inference_steps": {"min": 50, "max": 125},
+                "generator": torch.Generator("cuda"),
+                "height": [512, 768],
+                "width": [512, 768]
+            },
+            "enable_cpu_offload": False,
+            "pipeline": "FluxPipeline"
         }
     ]
 }
@@ -60,16 +79,30 @@
 
 TARGET_IMAGE_SIZE = (256, 256)
 
-PROMPT_TYPES = ('random', 'annotation')
+PROMPT_TYPES = ('random', 'annotation', 'none')
 
 PROMPT_GENERATOR_ARGS = {
     m['model']: m for m in VALIDATOR_MODEL_META['prompt_generators']
 }
 
 PROMPT_GENERATOR_NAMES = list(PROMPT_GENERATOR_ARGS.keys())
 
+# args for .from_pretrained
 DIFFUSER_ARGS = {
-    m['path']: {k: v for k, v in m.items() if k != 'path' and k != 'pipeline'}  
+    m['path']: {
+        k: v for k, v in m.items()
+        if k not in ('path', 'pipeline', 'generate_args', 'enable_cpu_offload')
+    } for m in VALIDATOR_MODEL_META['diffusers']
+}
+
+GENERATE_ARGS = {
+    m['path']: m['generate_args']
+    for m in VALIDATOR_MODEL_META['diffusers']
+    if 'generate_args' in m
+}
+
+DIFFUSER_CPU_OFFLOAD_ENABLED = {
+    m['path']: m.get('enable_cpu_offload', False)
     for m in VALIDATOR_MODEL_META['diffusers']
 }
 
@@ -79,4 +112,7 @@
 
 DIFFUSER_NAMES = list(DIFFUSER_ARGS.keys())
 
-IMAGE_ANNOTATION_MODEL = "Salesforce/blip2-opt-2.7b-coco"
+IMAGE_ANNOTATION_MODEL = "Salesforce/blip2-opt-6.7b-coco"
+
+TEXT_MODERATION_MODEL = "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit" 
+# "meta-llama/Meta-Llama-3.1-8B-Instruct"
diff --git a/bitmind/miner/__init__.py b/bitmind/miner/__init__.py
diff --git a/bitmind/protocol.py b/bitmind/protocol.py
@@ -26,12 +26,6 @@
 import base64
 import torch
 
-def b64_encode(image):
-    if isinstance(image, torch.Tensor):
-        image = transforms.ToPILImage()(image.cpu().detach())
-    image_bytes = BytesIO()
-    image.save(image_bytes, format="JPEG")
-    return base64.b64encode(image_bytes.getvalue())
 
 def prepare_image_synapse(image: Image):
     """
@@ -43,7 +37,12 @@ def prepare_image_synapse(image: Image):
     Returns:
         ImageSynapse: An instance of ImageSynapse containing the encoded image and a default prediction value.
     """
-    b64_encoded_image = b64_encode(image)
+    if isinstance(image, torch.Tensor):
+        image = transforms.ToPILImage()(image.cpu().detach())
+
+    image_bytes = BytesIO()
+    image.save(image_bytes, format="JPEG")
+    b64_encoded_image = base64.b64encode(image_bytes.getvalue())
     return ImageSynapse(image=b64_encoded_image)
 
 

diff --git a/bitmind/synthetic_image_generation/combine_datasets.py b/bitmind/synthetic_image_generation/combine_datasets.py
diff --git a/bitmind/synthetic_image_generation/image_annotation_generator.py b/bitmind/synthetic_image_generation/image_annotation_generator.py
@@ -1,5 +1,5 @@
 # Transformer models
-from transformers import AutoProcessor, Blip2ForConditionalGeneration
+from transformers import Blip2Processor, Blip2ForConditionalGeneration, pipeline
 
 # Logging and progress handling
 from transformers import logging as transformers_logging
@@ -14,28 +14,89 @@
 
 from bitmind.image_dataset import ImageDataset
 from bitmind.synthetic_image_generation.utils import image_utils
+from bitmind.constants import HUGGINGFACE_CACHE_DIR
 
 disable_progress_bar()
 
 class ImageAnnotationGenerator:
-    def __init__(self, model_name: str, device: str = 'auto'):
-        self.device = torch.device('cuda' if torch.cuda.is_available() and device == 'auto' else 'cpu')
+    def __init__(
+        self, model_name: str, text_moderation_model_name: str, device: str = 'auto',
+        apply_moderation: bool = True
+    ):
+        self.device = torch.device(
+            'cuda' if torch.cuda.is_available() and device == 'auto' else 'cpu'
+        )
+
         self.model_name = model_name
-        self.processor = AutoProcessor.from_pretrained(self.model_name)
+        self.processor = Blip2Processor.from_pretrained(
+            self.model_name, cache_dir=HUGGINGFACE_CACHE_DIR
+        )
         self.model = None
-        self.load_model()
-
-    def load_model(self):
-        self.model = Blip2ForConditionalGeneration.from_pretrained(self.model_name, torch_dtype=torch.float16)
+
+        self.apply_moderation = apply_moderation
+        self.text_moderation_model_name = text_moderation_model_name
+        self.text_moderation_pipeline = None
+
+    def load_models(self):
+        bt.logging.info(f"Loading image annotation model {self.model_name}")
+        self.model = Blip2ForConditionalGeneration.from_pretrained(
+            self.model_name, 
+            torch_dtype=torch.float16, 
+            cache_dir=HUGGINGFACE_CACHE_DIR
+        )
         self.model.to(self.device)
+        bt.logging.info(f"Loaded image annotation model {self.model_name}")
+        bt.logging.info(f"Loading annotation moderation model {self.text_moderation_model_name}...")
+        if self.apply_moderation:
+            self.text_moderation_pipeline = pipeline(
+                "text-generation",
+                model=self.text_moderation_model_name,
+                model_kwargs={"torch_dtype": torch.bfloat16}, 
+                device_map="auto"
+            )
+        bt.logging.info(f"Loaded annotation moderation model {self.text_moderation_model_name}.")
 
     def clear_gpu(self):
         bt.logging.debug(f"Clearing GPU memory after generating image annotation")
         self.model.to('cpu')
         del self.model
+        if self.text_moderation_pipeline:
+            self.text_moderation_pipeline = None
         gc.collect()
         torch.cuda.empty_cache()
 
+    def moderate_description(self, description: str, max_new_tokens: int = 80) -> str:
+        """
+        Uses the text moderation pipeline to make the description more concise and neutral.
+        """
+        messages = [
+            {
+                "role": "system",
+                "content": ("[INST]You always concisely rephrase given descriptions, eliminate redundancy, "
+                            "and remove all specific references to individuals by name. You do not respond with"
+                            "anything other than the revised description.[/INST]")
+            },
+            {
+                "role": "user",
+                "content": description
+            }
+        ]
+        try:
+            moderated_text = self.text_moderation_pipeline(messages, max_new_tokens=max_new_tokens,
+                                                           pad_token_id=self.text_moderation_pipeline.tokenizer.eos_token_id,
+                                                           return_full_text=False)
+
+            if isinstance(moderated_text, list):
+                return moderated_text[0]['generated_text']
+                bt.logging.error("Failed to return moderated text.")
+            else:
+                bt.logging.error("Moderated text did not return a list.")
+
+            return description  # Fallback to the original description if no suitable entry is found
+        except Exception as e:
+            bt.logging.error(f"An error occurred during moderation: {e}", exc_info=True)
+            return description  # Return the original description as a fallback
+
     def generate_description(self,
                              image: PIL.Image.Image,
                              verbose: bool = False,
@@ -56,8 +117,7 @@ def generate_description(self,
             transformers_logging.set_verbosity_error()
 
         description = ""
-        prompts = ["A picture of", "The setting is", "The background is", "The image type/style is"]
-
+        prompts = ["An image of", "The setting is", "The background is", "The image type/style is"]
         for i, prompt in enumerate(prompts):
             description += prompt + ' '
             inputs = self.processor(image, text=description, return_tensors="pt").to(self.device, torch.float16)
@@ -80,11 +140,17 @@ def generate_description(self,
 
         if not verbose:
             transformers_logging.set_verbosity_info()
-
+
+        if description.startswith(prompts[0]):
+                    description = description[len(prompts[0]):]
+
         # Remove any trailing spaces and ensure the description ends with a period
         description = description.strip()
         if not description.endswith('.'):
             description += '.'
+        if self.apply_moderation:
+            moderated_description = self.moderate_description(description)
+            return moderated_description
         return description
 
     def generate_annotation(