Merge remote-tracking branch 'fork/main'

krea-ai · Jul 3, 2024 · de0febb · de0febb
2 parents 06ee4db + 0a3cbf6
commit de0febb
Show file tree

Hide file tree

Showing 6 changed files with 2,580 additions and 724 deletions.
diff --git a/inpaint_test.py b/inpaint_test.py
@@ -0,0 +1,216 @@
+# !pip install transformers accelerate
+import os
+
+import numpy as np
+import torch
+
+from diffusers import (
+    ControlNetModel,
+    DDIMScheduler,
+    EulerAncestralDiscreteScheduler,
+    StableDiffusionControlNetInpaintPipeline,
+    StableDiffusionImg2ImgPipeline,
+)
+from diffusers.utils import load_image
+from mask_utils import create_gradient, expand_image, load_image
+from masked2 import StableDiffusionMaskedImg2ImgPipeline
+
+
+init_image = load_image(
+    "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_inpaint/boy.png"
+)
+init_image = init_image.resize((512, 512))
+
+# generator = torch.Generator(device="cpu").manual_seed(1)
+
+mask_image = load_image(
+    "https://huggingface.co/datasets/diffusers/test-arrays/resolve/main/stable_diffusion_inpaint/boy_mask.png"
+)
+mask_image = mask_image.resize((512, 512))
+
+
+img_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo.png"
+mask_url = "https://raw.githubusercontent.com/CompVis/latent-diffusion/main/data/inpainting_examples/overture-creations-5sI6fQgYIuo_mask.png"
+
+init_image = load_image(img_url)
+mask_image = load_image(mask_url)
+
+
+img_path = "/home/erwann/diffusers/examples/community/new_image.png"
+# mask_path = "/home/erwann/diffusers/examples/community/hard_mask_5.png"
+mask_path = "/home/erwann/diffusers/examples/community/mask_image.png"
+init_image = load_image(img_path)
+mask_image = load_image(mask_path)
+# mask_image.save("mask.png")
+
+
+# new_width  = 480
+# new_height = new_width * init_image.height / init_image.width 
+# new_height = 640
+# init_image = init_image.resize((new_width, int(new_height)))
+
+# mask_image = mask_image.resize(init_image.size)
+# mask_image = mask_image.resize((512, 512))
+
+def make_inpaint_condition(image, image_mask):
+    image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
+    image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
+
+    assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
+    image[image_mask > 0.001] = -1.0  # set as masked pixel
+    image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
+    image = torch.from_numpy(image)
+    return image
+
+
+control_image = make_inpaint_condition(init_image, mask_image)
+
+controlnet = ControlNetModel.from_pretrained(
+    "lllyasviel/control_v11p_sd15_inpaint", torch_dtype=torch.float16
+)
+
+# pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+#     "/home/erwann/diffusers/examples/community/realistic_vision", controlnet=controlnet, torch_dtype=torch.float16
+# )
+from custom_inpaint_pipeline import StableDiffusionMaskedLatentControlNetInpaintPipeline
+
+
+pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+    "/home/erwann/diffusers/examples/community/realistic_vision", controlnet=controlnet, torch_dtype=torch.float16
+)
+
+# pipe = StableDiffusionMaskedLatentControlNetInpaintPipeline.from_pretrained(
+#     "/home/erwann/diffusers/examples/community/realistic_vision", controlnet=controlnet, torch_dtype=torch.float16
+# )
+pipe = StableDiffusionMaskedLatentControlNetInpaintPipeline(
+    pipe.vae, pipe.text_encoder, pipe.tokenizer, pipe.unet, pipe.controlnet, pipe.scheduler, None, None,
+)
+
+
+# pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+#     "/home/erwann/diffusers/examples/community/deliberate", controlnet=controlnet, torch_dtype=torch.float16
+# )
+# pipe = StableDiffusionControlNetInpaintPipeline.from_pretrained(
+    # "/home/erwann/generation-service/safetensor-models/sd1.5", controlnet=controlnet, torch_dtype=torch.float16
+# )
+# generator = None
+# speed up diffusion process with faster scheduler and memory optimization
+# pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+
+pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
+
+
+from diffusers import DPMSolverMultistepScheduler
+
+
+pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True)
+
+
+
+
+# init_image = load_image("/home/erwann/diffusers/examples/community/castle.png")
+init_image = load_image("/home/erwann/diffusers/examples/community/bmw.png")
+init_image = init_image.resize((512, 512))
+
+
+extended_image, mask_image = expand_image(init_image, expand_x=0, expand_y=-256)
+print("Image size after extending, " + str(extended_image.size))
+control_image = make_inpaint_condition(extended_image, mask_image)
+blend_mask = create_gradient(mask_image, x=None, y=-256, offset=200)
+
+extended_image.save("extended_image.png")
+mask_image.save("mask_image.png")
+blend_mask.save("blend_mask.png")
+# vae = AutoencoderKL.from_pretrained("stabilityai/sd-vae-ft-mse").half()
+# pipe.vae = vae
+
+pipe.enable_model_cpu_offload()
+pipe.enable_xformers_memory_efficient_attention()
+
+generator = None
+# generator = torch.Generator().manual_seed(456)
+generator = torch.Generator().manual_seed(123)
+# generate image
+pipe.safety_checker = None
+prompt= "bmw drifting, pink smoke"
+images = pipe(
+    prompt,
+    num_inference_steps=25,
+    generator=generator,
+    guidance_scale=6.0,
+    negative_prompt="deformed iris, deformed pupils, worst quality, low quality, jpeg artifacts, ugly, duplicate, morbid, mutilated, extra fingers, mutated hands, poorly drawn hands, poorly drawn face, mutation, deformed, blurry, bad anatomy, bad proportions, extra limbs, cloned face, disfigured, gross proportions, malformed limbs, missing arms, missing legs, extra arms, extra legs, fused fingers, too many fingers, long neck",
+    # eta=1.0,
+    # eta=1.0,
+    # soft_mask=blend_mask,
+    width=extended_image.width,
+    height=extended_image.height,
+    image=extended_image,
+    mask_image=blend_mask,
+    control_image=control_image,
+    num_images_per_prompt=4,
+    controlnet_conditioning_scale=1.,
+    guess_mode=True,
+).images
+
+
+folder = "_".join(prompt.split(" ")) 
+folder = "no_prompt" if len(folder) == 0 else folder
+os.makedirs(folder, exist_ok=True)
+print("Saving to ", folder)
+
+for i, image in enumerate(images):
+    image.save(os.path.join(folder, f"2_extend_{i}.png"))
+
+
+#best config .35 / 20 steps
+
+# img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("/home/erwann/generation-service/safetensor-models/real", safety_checker=None)
+# img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("/home/erwann/generation-service/safetensor-models/realistic_vision", safety_checker=None)
+img2img_pipe = StableDiffusionMaskedImg2ImgPipeline.from_pretrained("/home/erwann/generation-service/safetensor-models/realistic_vision", safety_checker=None)
+
+print("Scheduler")
+print(img2img_pipe.scheduler)
+
+
+# img2imgpipe = StableDiffusionImg2ImgPipeline(
+#     vae=pipe.vae,
+#     text_encoder=pipe.text_encoder,
+#     tokenizer=pipe.tokenizer,
+#     unet=pipe.unet,
+#     scheduler=pipe.scheduler,
+#     safety_checker=None,
+#     feature_extractor=pipe.feature_extractor,
+# )
+
+
+img2img_pipe = img2img_pipe.to("cuda")
+img2img_pipe.enable_attention_slicing()
+img2img_pipe.enable_xformers_memory_efficient_attention()
+
+# soft_mask_pil = Image.open("/home/erwann/diffusers/examples/community/soft_mask_5.png")
+
+# img2img_pipe = StableDiffusionImg2ImgPipeline.from_pretrained("/home/erwann/generation-service/safetensor-models/real", safety_checker=None)
+from PIL import Image
+
+
+for i, image in enumerate(images):
+    final_image = img2img_pipe(
+        prompt,
+        image=image,
+        mask_image=blend_mask,
+        strength=0.350,
+        num_inference_steps=19,
+        generator=generator,
+    ).images[0]
+    final_image.save(os.path.join(folder, f"img2img_{i}_real_cfg8_9.png"))
+    # plt.imshow(final_image)
+    # plt.show()
+
+# import matplotlib.pyplot as plt
+# from PIL import Image
+
+
+
+# plt.imshow(image)
+# plt.show()
+# plt.show()
diff --git a/mask_utils.py b/mask_utils.py
@@ -0,0 +1,110 @@
+import numpy as np
+import torch
+
+from diffusers.utils import load_image
+from PIL import Image, ImageDraw, ImageFilter
+
+
+def make_inpaint_condition(image, image_mask):
+    image = np.array(image.convert("RGB")).astype(np.float32) / 255.0
+    image_mask = np.array(image_mask.convert("L")).astype(np.float32) / 255.0
+
+    assert image.shape[0:1] == image_mask.shape[0:1], "image and image_mask must have the same image size"
+    image[image_mask > 0.001] = -1.0  # set as masked pixel
+    image = np.expand_dims(image, 0).transpose(0, 3, 1, 2)
+    image = torch.from_numpy(image)
+    return image
+
+
+def create_gradient(image, y=None, x=None, offset=40):
+    """
+    Takes a binary mask (white = area to be inpainted, black = area to be kept from original image) and creates a gradient at the border of the mask. The gradient adds a white to black gradient that extends into the original black area. 
+    
+    This ensures that the inpainted area is not a hard border, but a smooth transition from the inpainted area to the original image. 
+    
+    Used to blend together latents in MaskedImg2ImgPipeline
+    """
+    if y is None and x is None:
+        raise ValueError("Either y or x must be specified")
+    draw = ImageDraw.Draw(image)
+    if y and x:
+        raise ValueError("Only one of y or x must be specified (for now)")
+
+    sign = 1
+    if offset < 0:
+        sign = -1
+
+    offset = abs(offset)
+    if y is not None:
+        if y > 0: 
+            y = image.height - y
+            if offset > 0:
+                sign = -1 
+        else:
+            y = abs(y)
+        for i in range(abs(offset)):
+            color = abs(255 - int(255 * (i / abs(offset))))  # calculate grayscale color
+            i *= sign
+            draw.line([(0, y+i), (image.width, y+i)], fill=(color, color, color))
+    if x is not None:
+        if x > 0: 
+            x = image.width - x
+            if offset > 0:
+                sign = -1 
+        else:
+            x = abs(x)
+        for i in range(abs(offset)):
+            color = abs(255 - int(255 * (i / abs(offset))))  # calculate grayscale color
+            i *= sign
+            draw.line([(x+i, 0), (x+i, image.height)], fill=(color, color, color))
+    return image
+
+# def soften_mask(mask_before_blur, mask_img, blur_radius):
+#     # Apply Gaussian Blur to the mask
+#     blurred_mask = mask_img.filter(ImageFilter.GaussianBlur(blur_radius))
+#     mask_before_blur = mask_before_blur.convert("L")
+
+#     blurred_mask.paste(mask_before_blur, mask=mask_before_blur)
+
+#     return blurred_mask
+
+def expand_image(img, expand_y=0, expand_x=0):
+    # Load the image
+    img = load_image(img)
+    width, height = img.size
+
+    # Create a new image with expanded height
+    new_height = height + abs(expand_y)
+    new_width = width + abs(expand_x)
+
+    new_img = Image.new('RGB', (new_width, new_height), color = 'white')
+
+    # Create a mask image
+    mask_img = Image.new('1', (new_width, new_height), color = 'white')
+
+    # If expand_y is positive, the image is expanded on the bottom.
+    # If expand_y is negative, the image is expanded on the top.
+    y_position = 0 if expand_y > 0 else abs(expand_y)
+    x_position = 0 if expand_x > 0 else abs(expand_x)
+    new_img.paste(img, (x_position, y_position))
+
+    # Create mask
+    mask_img.paste(Image.new('1', img.size, color = 'black'), (x_position, y_position))
+    mask_img = mask_img.convert("RGB")
+
+    # soft_mask_img = soften_mask(mask_img, mask_img, 50)
+    # return new_img, mask_img, soft_mask_img
+
+    return new_img, mask_img
+
+if __name__ == '__main__':
+    # Usage:
+    path = "/home/erwann/diffusers/examples/community/castle.png"
+    expand = 256
+    new_img, mask_img = expand_image(path, expand_x=expand)
+    new_img.save('new_image.png')
+    mask_img.save('mask_image.png')
+    # soft_mask.save('soft_mask.png')
+    softened_mask = create_gradient(mask_img, x=expand, offset=40)
+    softened_mask.save('soft_mask.png')
+