Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

when I finetune sd model, and set trainer(precision=16), an error occurred #25

Open
zyx1213271098 opened this issue Oct 18, 2022 · 0 comments

Comments

@zyx1213271098
Copy link

Traceback (most recent call last):
File "main.py", line 851, in
trainer.fit(model, data)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 553, in fit
self._run(model)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 918, in _run
self._dispatch()
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 986, in _dispatch
self.accelerator.start_training(self)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 92, in start_training
self.training_type_plugin.start_training(trainer)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py", line 161, in start_training
self._results = trainer.run_stage()
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 996, in run_stage
return self._run_train()
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/trainer/trainer.py", line 1045, in _run_train
self.fit_loop.run()
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(*args, **kwargs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/fit_loop.py", line 200, in advance
epoch_output = self.epoch_loop.run(train_dataloader)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(*args, **kwargs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py", line 130, in advance
batch_output = self.batch_loop.run(batch, self.iteration_count, self._dataloader_idx)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 101, in run
super().run(batch, batch_idx, dataloader_idx)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/base.py", line 111, in run
self.advance(*args, **kwargs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 148, in advance
result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 194, in _run_optimization
closure()
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 236, in _training_step_and_backward_closure
result = self.training_step_and_backward(split_batch, batch_idx, opt_idx, optimizer, hiddens)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 549, in training_step_and_backward
self.backward(result, optimizer, opt_idx)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py", line 590, in backward
result.closure_loss = self.trainer.accelerator.backward(result.closure_loss, optimizer, *args, **kwargs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/accelerators/accelerator.py", line 276, in backward
self.precision_plugin.backward(self.lightning_module, closure_loss, *args, **kwargs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/plugins/precision/precision_plugin.py", line 78, in backward
model.backward(closure_loss, optimizer, *args, **kwargs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/pytorch_lightning/core/lightning.py", line 1481, in backward
loss.backward(*args, **kwargs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/_tensor.py", line 363, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/autograd/init.py", line 173, in backward
Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/autograd/function.py", line 253, in apply
return user_fn(self, *args)
File "/root/data/juicefs_hz_cv_v3/11120102/project/generative-model/pesser-stable-diffusion/ldm/modules/diffusionmodules/util.py", line 138, in backward
output_tensors = ctx.run_function(*shallow_copies)
File "/root/data/juicefs_hz_cv_v3/11120102/project/generative-model/pesser-stable-diffusion/ldm/modules/attention.py", line 215, in _forward
x = self.attn1(self.norm1(x), context=context if self.disable_self_attn else None) + x
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1110, in _call_impl
return forward_call(*input, **kwargs)
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/nn/modules/normalization.py", line 189, in forward
return F.layer_norm(
File "/opt/conda/envs/ldm/lib/python3.8/site-packages/torch/nn/functional.py", line 2486, in layer_norm
return torch.layer_norm(input, normalized_shape, weight, bias, eps, torch.backends.cudnn.enabled)
RuntimeError: expected scalar type Half but found Float

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant