Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fine_tune_RT_DETR_on_a_custom_dataset: #465

Open
msaqib17 opened this issue Oct 1, 2024 · 0 comments
Open

Fine_tune_RT_DETR_on_a_custom_dataset: #465

msaqib17 opened this issue Oct 1, 2024 · 0 comments

Comments

@msaqib17
Copy link

msaqib17 commented Oct 1, 2024


RuntimeError Traceback (most recent call last)
Cell In[17], line 13
1 from transformers import Trainer
3 trainer = Trainer(
4 model=model,
5 args=training_args,
(...)
10 compute_metrics=eval_compute_metrics_fn,
11 )
---> 13 trainer.train()

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:2052, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
2050 hf_hub_utils.enable_progress_bars()
2051 else:
-> 2052 return inner_training_loop(
2053 args=args,
2054 resume_from_checkpoint=resume_from_checkpoint,
2055 trial=trial,
2056 ignore_keys_for_eval=ignore_keys_for_eval,
2057 )

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:2388, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
2385 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
2387 with self.accelerator.accumulate(model):
-> 2388 tr_loss_step = self.training_step(model, inputs)
2390 if (
2391 args.logging_nan_inf_filter
2392 and not is_torch_xla_available()
2393 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
2394 ):
2395 # if loss is nan or inf simply add the average of previous logged losses
2396 tr_loss = tr_loss + tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:3485, in Trainer.training_step(self, model, inputs)
3482 return loss_mb.reduce_mean().detach().to(self.args.device)
3484 with self.compute_loss_context_manager():
-> 3485 loss = self.compute_loss(model, inputs)
3487 del inputs
3488 if (
3489 self.args.torch_empty_cache_steps is not None
3490 and self.state.global_step % self.args.torch_empty_cache_steps == 0
3491 ):

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:3532, in Trainer.compute_loss(self, model, inputs, return_outputs)
3530 else:
3531 labels = None
-> 3532 outputs = model(**inputs)
3533 # Save past state if it exists
3534 # TODO: this needs to be fixed and made cleaner later.
3535 if self.args.past_index >= 0:

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
1551 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1552 else:
-> 1553 return self._call_impl(*args, **kwargs)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
1557 # If we don't have any hooks, we want to skip the rest of the logic in
1558 # this function, and just call forward.
1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1560 or _global_backward_pre_hooks or _global_backward_hooks
1561 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562 return forward_call(*args, **kwargs)
1564 try:
1565 result = None

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/data_parallel.py:186, in DataParallel.forward(self, *inputs, **kwargs)
184 return self.module(*inputs[0], **module_kwargs[0])
185 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
--> 186 outputs = self.parallel_apply(replicas, inputs, module_kwargs)
187 return self.gather(outputs, self.output_device)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/data_parallel.py:201, in DataParallel.parallel_apply(self, replicas, inputs, kwargs)
200 def parallel_apply(self, replicas: Sequence[T], inputs: Sequence[Any], kwargs: Any) -> List[Any]:
--> 201 return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/parallel_apply.py:109, in parallel_apply(modules, inputs, kwargs_tup, devices)
107 output = results[i]
108 if isinstance(output, ExceptionWrapper):
--> 109 output.reraise()
110 outputs.append(output)
111 return outputs

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/_utils.py:706, in ExceptionWrapper.reraise(self)
702 except TypeError:
703 # If the exception takes multiple arguments, don't try to
704 # instantiate since we don't know how to
705 raise RuntimeError(msg) from None
--> 706 raise exception

RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/parallel_apply.py", line 84, in _worker
output = module(*input, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/models/rt_detr/modeling_rt_detr.py", line 2659, in forward
outputs = self.model(
^^^^^^^^^^^
File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/models/rt_detr/modeling_rt_detr.py", line 1892, in forward
reference_points_unact = torch.concat([denoising_bbox_unact, reference_points_unact], 1)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 16 but got size 8 for tensor number 1 in the list.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant