Skip to content

Commit

Permalink
fp8/pipeline: add some profiler annotation for prepare/denoise/vae
Browse files Browse the repository at this point in the history
  • Loading branch information
yorickvP committed Oct 10, 2024
1 parent f676e86 commit 0039a42
Showing 1 changed file with 23 additions and 20 deletions.
43 changes: 23 additions & 20 deletions fp8/flux_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,15 +615,16 @@ def generate(
)

# prepare inputs
img, img_ids, vec, txt, txt_ids = map(
lambda x: x, # x.contiguous(),
self.prepare(
img=img,
prompt=prompt,
target_device=self.device_flux,
target_dtype=self.dtype,
),
)
with torch.profiler.record_function("prepare"):
img, img_ids, vec, txt, txt_ids = map(
lambda x: x, # x.contiguous(),
self.prepare(
img=img,
prompt=prompt,
target_device=self.device_flux,
target_dtype=self.dtype,
),
)

# dispatch to gpu if offloaded
if self.offload_flow:
Expand All @@ -634,16 +635,17 @@ def generate(
output_imgs = []

for i in range(batch_size):
denoised_img = self.denoise_single_item(
img[i],
img_ids[i],
txt[i],
txt_ids[i],
vec[i],
timesteps,
guidance,
compiling
)
with torch.profiler.record_function("denoise-single-item"):
denoised_img = self.denoise_single_item(
img[i],
img_ids[i],
txt[i],
txt_ids[i],
vec[i],
timesteps,
guidance,
compiling
)
output_imgs.append(denoised_img)
compiling = False

Expand All @@ -655,7 +657,8 @@ def generate(
torch.cuda.empty_cache()

# decode latents to pixel space
img = self.vae_decode(img, height, width)
with torch.profiler.record_function("vae-decode"):
img = self.vae_decode(img, height, width)

return self.as_img_tensor(img)

Expand Down

0 comments on commit 0039a42

Please sign in to comment.