Fine_tune_RT_DETR_on_a_custom_dataset:

---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
Cell In[17], line 13
      1 from transformers import Trainer
      3 trainer = Trainer(
      4     model=model,
      5     args=training_args,
   (...)
     10     compute_metrics=eval_compute_metrics_fn,
     11 )
---> 13 trainer.train()

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:2052, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
   2050         hf_hub_utils.enable_progress_bars()
   2051 else:
-> 2052     return inner_training_loop(
   2053         args=args,
   2054         resume_from_checkpoint=resume_from_checkpoint,
   2055         trial=trial,
   2056         ignore_keys_for_eval=ignore_keys_for_eval,
   2057     )

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:2388, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
   2385     self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
   2387 with self.accelerator.accumulate(model):
-> 2388     tr_loss_step = self.training_step(model, inputs)
   2390 if (
   2391     args.logging_nan_inf_filter
   2392     and not is_torch_xla_available()
   2393     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
   2394 ):
   2395     # if loss is nan or inf simply add the average of previous logged losses
   2396     tr_loss = tr_loss + tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:3485, in Trainer.training_step(self, model, inputs)
   3482     return loss_mb.reduce_mean().detach().to(self.args.device)
   3484 with self.compute_loss_context_manager():
-> 3485     loss = self.compute_loss(model, inputs)
   3487 del inputs
   3488 if (
   3489     self.args.torch_empty_cache_steps is not None
   3490     and self.state.global_step % self.args.torch_empty_cache_steps == 0
   3491 ):

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/trainer.py:3532, in Trainer.compute_loss(self, model, inputs, return_outputs)
   3530 else:
   3531     labels = None
-> 3532 outputs = model(**inputs)
   3533 # Save past state if it exists
   3534 # TODO: this needs to be fixed and made cleaner later.
   3535 if self.args.past_index >= 0:

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py:1553, in Module._wrapped_call_impl(self, *args, **kwargs)
   1551     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
   1552 else:
-> 1553     return self._call_impl(*args, **kwargs)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py:1562, in Module._call_impl(self, *args, **kwargs)
   1557 # If we don't have any hooks, we want to skip the rest of the logic in
   1558 # this function, and just call forward.
   1559 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
   1560         or _global_backward_pre_hooks or _global_backward_hooks
   1561         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1562     return forward_call(*args, **kwargs)
   1564 try:
   1565     result = None

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/data_parallel.py:186, in DataParallel.forward(self, *inputs, **kwargs)
    184     return self.module(*inputs[0], **module_kwargs[0])
    185 replicas = self.replicate(self.module, self.device_ids[:len(inputs)])
--> 186 outputs = self.parallel_apply(replicas, inputs, module_kwargs)
    187 return self.gather(outputs, self.output_device)

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/data_parallel.py:201, in DataParallel.parallel_apply(self, replicas, inputs, kwargs)
    200 def parallel_apply(self, replicas: Sequence[T], inputs: Sequence[Any], kwargs: Any) -> List[Any]:
--> 201     return parallel_apply(replicas, inputs, kwargs, self.device_ids[:len(replicas)])

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/parallel_apply.py:109, in parallel_apply(modules, inputs, kwargs_tup, devices)
    107     output = results[i]
    108     if isinstance(output, ExceptionWrapper):
--> 109         output.reraise()
    110     outputs.append(output)
    111 return outputs

File ~/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/_utils.py:706, in ExceptionWrapper.reraise(self)
    702 except TypeError:
    703     # If the exception takes multiple arguments, don't try to
    704     # instantiate since we don't know how to
    705     raise RuntimeError(msg) from None
--> 706 raise exception

RuntimeError: Caught RuntimeError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/parallel/parallel_apply.py", line 84, in _worker
    output = module(*input, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/models/rt_detr/modeling_rt_detr.py", line 2659, in forward
    outputs = self.model(
              ^^^^^^^^^^^
  File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/torch/nn/modules/module.py", line 1562, in _call_impl
    return forward_call(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/xyz/anaconda3/envs/dvc_detr/lib/python3.12/site-packages/transformers/models/rt_detr/modeling_rt_detr.py", line 1892, in forward
    reference_points_unact = torch.concat([denoising_bbox_unact, reference_points_unact], 1)
                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 16 but got size 8 for tensor number 1 in the list.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Fine_tune_RT_DETR_on_a_custom_dataset: #465

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Fine_tune_RT_DETR_on_a_custom_dataset: #465

Description

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions