diff --git a/models/turbine_models/custom_models/pipeline_base.py b/models/turbine_models/custom_models/pipeline_base.py index 837b95e6..33bc425b 100644 --- a/models/turbine_models/custom_models/pipeline_base.py +++ b/models/turbine_models/custom_models/pipeline_base.py @@ -368,6 +368,8 @@ def __init__( target, dict ), "Device and target triple must be both dicts or both strings." for submodel in self.map.keys(): + if self.map[submodel].get("load") == False: + continue assert submodel in device.keys(), f"Device for {submodel} not found." assert ( submodel in target.keys() diff --git a/models/turbine_models/custom_models/sd_inference/sd_pipeline.py b/models/turbine_models/custom_models/sd_inference/sd_pipeline.py index 4cd1e4b8..61bb66d8 100644 --- a/models/turbine_models/custom_models/sd_inference/sd_pipeline.py +++ b/models/turbine_models/custom_models/sd_inference/sd_pipeline.py @@ -120,6 +120,8 @@ "decomp_attn": None, }, }, +} +sdxl_compiled_pipeline_map = { "unetloop": { "module_name": "sdxl_compiled_pipeline", "load": False, @@ -434,7 +436,7 @@ def load_scheduler( if self.is_sd3: export_fn = sd3_schedulers.export_scheduler_model else: - export_fn = scheduler.export_scheduler_model + export_fn = schedulers.export_scheduler_model self.map["scheduler"] = { "module_name": "compiled_scheduler", "export_fn": export_fn, diff --git a/models/turbine_models/custom_models/sd_inference/utils.py b/models/turbine_models/custom_models/sd_inference/utils.py index d0235d1f..aee316cd 100644 --- a/models/turbine_models/custom_models/sd_inference/utils.py +++ b/models/turbine_models/custom_models/sd_inference/utils.py @@ -476,7 +476,7 @@ def get_mfma_spec_path(target_chip, save_dir, masked_attention=False, use_punet= url = "https://raw.githubusercontent.com/nod-ai/sdxl-scripts/main/int8-model/specs/attention_and_matmul_spec.mlir" elif not masked_attention: suffix = "" - url = "https://sharkpublic.blob.core.windows.net/sharkpublic/specs/no_pad/attention_and_matmul_spec_mfma.mlir" + url = "https://raw.githubusercontent.com/iree-org/iree/refs/heads/main/build_tools/pkgci/external_test_suite/attention_and_matmul_spec.mlir" else: suffix = "_pad" url = "https://sharkpublic.blob.core.windows.net/sharkpublic/specs/latest/attention_and_matmul_spec_gfx942.mlir" diff --git a/models/turbine_models/custom_models/sd_inference/vae.py b/models/turbine_models/custom_models/sd_inference/vae.py index 9d5c0c6f..732ccc5f 100644 --- a/models/turbine_models/custom_models/sd_inference/vae.py +++ b/models/turbine_models/custom_models/sd_inference/vae.py @@ -171,7 +171,6 @@ def export_vae_model( vae_model, external_weights, external_weight_path, - vae_harness=vae_harness, ) if weights_only: return external_weight_path diff --git a/models/turbine_models/custom_models/sdxl_inference/unet.py b/models/turbine_models/custom_models/sdxl_inference/unet.py index 73b32cf5..2d96f2e6 100644 --- a/models/turbine_models/custom_models/sdxl_inference/unet.py +++ b/models/turbine_models/custom_models/sdxl_inference/unet.py @@ -205,10 +205,6 @@ def export_unet_model( if not attn_spec: if (not decomp_attn) and use_punet: attn_spec = "punet" - elif (not decomp_attn) and "gfx9" in target: - attn_spec = "mfma" - elif (not decomp_attn) and "gfx11" in target: - attn_spec = "wmma" safe_name = utils.create_safe_name( hf_model_name, f"_bs{batch_size}_{max_length}_{height}x{width}_{precision}_{submodel_name}",