You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
(deformable_detr) root@1d4f89a0883e:/data/tmp/MOTR# sh configs/r50_motr_train.sh
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
Namespace(accurate_ratio=False, aux_loss=True, backbone='resnet50', batch_size=1, bbox_loss_coef=5, cache_mode=False, cj=False, clip_max_norm=0.1, cls_loss_coef=2, coco_panoptic_path=None, coco_path='/data/workspace/detectron2/datasets/coco/', crop=False, data_txt_path_train='./datasets/data_path/crowdhuman.train', data_txt_path_val='./datasets/data_path/crowdhuman.val', dataset_file='e2e_joint', dec_layers=6, dec_n_points=4, decoder_cross_self=False, device='cuda', dice_loss_coef=1, dilation=False, dim_feedforward=1024, dist_backend='nccl', dist_url='env://', distributed=True, dropout=0.0, enable_fpn=False, enc_layers=6, enc_n_points=4, epochs=200, eval=False, exp_name='submit', extra_track_attn=True, filter_ignore=False, focal_alpha=0.25, fp_ratio=0.3, frozen_weights=None, giou_loss_coef=2, gpu=0, gt_file_train=None, gt_file_val=None, hidden_dim=256, img_path='data/valid/JPEGImages/', input_video='figs/demo.mp4', loss_normalizer=False, lr=0.0002, lr_backbone=2e-05, lr_backbone_names=['backbone.0'], lr_drop=100, lr_drop_epochs=None, lr_linear_proj_mult=0.1, lr_linear_proj_names=['reference_points', 'sampling_offsets'], mask_loss_coef=1, masks=False, max_size=1333, memory_bank_len=4, memory_bank_score_thresh=0.0, memory_bank_type=None, memory_bank_with_self_attn=False, merger_dropout=0.0, meta_arch='motr', mix_match=False, mot_path='/data/tmp/MOTR/data/', nheads=8, num_anchors=1, num_feature_levels=4, num_queries=300, num_workers=2, output_dir='exps/e2e_motr_r50_joint', position_embedding='sine', position_embedding_scale=6.283185307179586, pretrained=None, query_interaction_layer='QIM', random_drop=0.1, rank=0, remove_difficult=False, resume='', sample_interval=10, sample_mode='random_interval', sampler_lengths=[2, 3, 4, 5], sampler_steps=[50, 90, 150], save_period=50, seed=42, set_cost_bbox=5, set_cost_class=2, set_cost_giou=2, sgd=False, sigmoid_attn=False, start_epoch=0, two_stage=False, update_query_pos=True, use_checkpoint=False, val_width=800, vis=False, weight_decay=0.0001, with_box_refine=True, world_size=2)
Training with Extra Self Attention in Every Decoder.
Training with Self-Cross Attention.
number of params: 43912992
register 1-th video: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train
sampler_steps=[50, 90, 150] lenghts=[2, 3, 4, 5]
register 1-th video: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/val
sampler_steps=[50, 90, 150] lenghts=[2, 3, 4, 5]
Start training
set epoch: epoch 0 period_idx=0
set epoch: epoch 0 period_idx=0
Traceback (most recent call last):
File "main.py", line 386, in
main(args)
File "main.py", line 332, in main
train_stats = train_func(
File "/data/tmp/MOTR/engine.py", line 104, in train_one_epoch_mot
for data_dict in metric_logger.log_every(data_loader, print_freq, header):
File "/data/tmp/MOTR/util/misc.py", line 260, in log_every
for obj in iterable:
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in next
data = self._next_data()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
return self._process_data(data)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
data.reraise()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
data = fetcher.fetch(index)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/data/tmp/MOTR/datasets/joint.py", line 175, in getitem
images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval)
File "/data/tmp/MOTR/datasets/joint.py", line 168, in pre_continuous_frames
img_i, targets_i = self._pre_single_frame(i)
File "/data/tmp/MOTR/datasets/joint.py", line 121, in _pre_single_frame
raise ValueError('invalid label path: {}'.format(label_path))
ValueError: invalid label path: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train/283554,ff900035056259.txt
Traceback (most recent call last):
File "main.py", line 386, in
main(args)
File "main.py", line 332, in main
train_stats = train_func(
File "/data/tmp/MOTR/engine.py", line 104, in train_one_epoch_mot
for data_dict in metric_logger.log_every(data_loader, print_freq, header):
File "/data/tmp/MOTR/util/misc.py", line 260, in log_every
for obj in iterable:
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in next
data = self._next_data()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
return self._process_data(data)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
data.reraise()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
data = fetcher.fetch(index)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/data/tmp/MOTR/datasets/joint.py", line 175, in getitem
images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval)
File "/data/tmp/MOTR/datasets/joint.py", line 168, in pre_continuous_frames
img_i, targets_i = self._pre_single_frame(i)
File "/data/tmp/MOTR/datasets/joint.py", line 121, in _pre_single_frame
raise ValueError('invalid label path: {}'.format(label_path))
ValueError: invalid label path: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train/273275,d79e3000d97c2336.txt
Killing subprocess 63352
Killing subprocess 63353
Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/deformable_detr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 340, in
main()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 326, in main
sigkill_handler(signal.SIGTERM, None) # not coming back
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 301, in sigkill_handler
raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd)
subprocess.CalledProcessError: Command '['/opt/conda/envs/deformable_detr/bin/python3', '-u', 'main.py', '--meta_arch', 'motr', '--epoch', '200', '--with_box_refine', '--lr_drop', '100', '--lr', '2e-4', '--lr_backbone', '2e-5', '--output_dir', 'exps/e2e_motr_r50_joint', '--batch_size', '1', '--sample_mode', 'random_interval', '--sample_interval', '10', '--sampler_steps', '50', '90', '150', '--sampler_lengths', '2', '3', '4', '5', '--update_query_pos', '--merger_dropout', '0', '--dropout', '0', '--random_drop', '0.1', '--fp_ratio', '0.3', '--query_interaction_layer', 'QIM', '--extra_track_attn', '--data_txt_path_train', './datasets/data_path/crowdhuman.train', '--data_txt_path_val', './datasets/data_path/crowdhuman.val', '--mot_path', '/data/tmp/MOTR/data/', '--dataset_file', 'e2e_joint']' returned non-zero exit status 1.
The text was updated successfully, but these errors were encountered:
tanglong-hub
changed the title
Errors when train the model
why there is no *txt?Errors when train the model
Jul 19, 2022
(deformable_detr) root@1d4f89a0883e:/data/tmp/MOTR# sh configs/r50_motr_train.sh
Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.
| distributed init (rank 1): env://
| distributed init (rank 0): env://
git:
sha: 8690da3, status: has uncommited changes, branch: main
Namespace(accurate_ratio=False, aux_loss=True, backbone='resnet50', batch_size=1, bbox_loss_coef=5, cache_mode=False, cj=False, clip_max_norm=0.1, cls_loss_coef=2, coco_panoptic_path=None, coco_path='/data/workspace/detectron2/datasets/coco/', crop=False, data_txt_path_train='./datasets/data_path/crowdhuman.train', data_txt_path_val='./datasets/data_path/crowdhuman.val', dataset_file='e2e_joint', dec_layers=6, dec_n_points=4, decoder_cross_self=False, device='cuda', dice_loss_coef=1, dilation=False, dim_feedforward=1024, dist_backend='nccl', dist_url='env://', distributed=True, dropout=0.0, enable_fpn=False, enc_layers=6, enc_n_points=4, epochs=200, eval=False, exp_name='submit', extra_track_attn=True, filter_ignore=False, focal_alpha=0.25, fp_ratio=0.3, frozen_weights=None, giou_loss_coef=2, gpu=0, gt_file_train=None, gt_file_val=None, hidden_dim=256, img_path='data/valid/JPEGImages/', input_video='figs/demo.mp4', loss_normalizer=False, lr=0.0002, lr_backbone=2e-05, lr_backbone_names=['backbone.0'], lr_drop=100, lr_drop_epochs=None, lr_linear_proj_mult=0.1, lr_linear_proj_names=['reference_points', 'sampling_offsets'], mask_loss_coef=1, masks=False, max_size=1333, memory_bank_len=4, memory_bank_score_thresh=0.0, memory_bank_type=None, memory_bank_with_self_attn=False, merger_dropout=0.0, meta_arch='motr', mix_match=False, mot_path='/data/tmp/MOTR/data/', nheads=8, num_anchors=1, num_feature_levels=4, num_queries=300, num_workers=2, output_dir='exps/e2e_motr_r50_joint', position_embedding='sine', position_embedding_scale=6.283185307179586, pretrained=None, query_interaction_layer='QIM', random_drop=0.1, rank=0, remove_difficult=False, resume='', sample_interval=10, sample_mode='random_interval', sampler_lengths=[2, 3, 4, 5], sampler_steps=[50, 90, 150], save_period=50, seed=42, set_cost_bbox=5, set_cost_class=2, set_cost_giou=2, sgd=False, sigmoid_attn=False, start_epoch=0, two_stage=False, update_query_pos=True, use_checkpoint=False, val_width=800, vis=False, weight_decay=0.0001, with_box_refine=True, world_size=2)
Training with Extra Self Attention in Every Decoder.
Training with Self-Cross Attention.
number of params: 43912992
register 1-th video: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train
sampler_steps=[50, 90, 150] lenghts=[2, 3, 4, 5]
register 1-th video: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/val
sampler_steps=[50, 90, 150] lenghts=[2, 3, 4, 5]
Start training
set epoch: epoch 0 period_idx=0
set epoch: epoch 0 period_idx=0
Traceback (most recent call last):
File "main.py", line 386, in
main(args)
File "main.py", line 332, in main
train_stats = train_func(
File "/data/tmp/MOTR/engine.py", line 104, in train_one_epoch_mot
for data_dict in metric_logger.log_every(data_loader, print_freq, header):
File "/data/tmp/MOTR/util/misc.py", line 260, in log_every
for obj in iterable:
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in next
data = self._next_data()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
return self._process_data(data)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
data.reraise()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
data = fetcher.fetch(index)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/data/tmp/MOTR/datasets/joint.py", line 175, in getitem
images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval)
File "/data/tmp/MOTR/datasets/joint.py", line 168, in pre_continuous_frames
img_i, targets_i = self._pre_single_frame(i)
File "/data/tmp/MOTR/datasets/joint.py", line 121, in _pre_single_frame
raise ValueError('invalid label path: {}'.format(label_path))
ValueError: invalid label path: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train/283554,ff900035056259.txt
Traceback (most recent call last):
File "main.py", line 386, in
main(args)
File "main.py", line 332, in main
train_stats = train_func(
File "/data/tmp/MOTR/engine.py", line 104, in train_one_epoch_mot
for data_dict in metric_logger.log_every(data_loader, print_freq, header):
File "/data/tmp/MOTR/util/misc.py", line 260, in log_every
for obj in iterable:
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 517, in next
data = self._next_data()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1199, in _next_data
return self._process_data(data)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1225, in _process_data
data.reraise()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/_utils.py", line 429, in reraise
raise self.exc_type(msg)
ValueError: Caught ValueError in DataLoader worker process 0.
Original Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 202, in _worker_loop
data = fetcher.fetch(index)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 44, in
data = [self.dataset[idx] for idx in possibly_batched_index]
File "/data/tmp/MOTR/datasets/joint.py", line 175, in getitem
images, targets = self.pre_continuous_frames(sample_start, sample_end, sample_interval)
File "/data/tmp/MOTR/datasets/joint.py", line 168, in pre_continuous_frames
img_i, targets_i = self._pre_single_frame(i)
File "/data/tmp/MOTR/datasets/joint.py", line 121, in _pre_single_frame
raise ValueError('invalid label path: {}'.format(label_path))
ValueError: invalid label path: /data/tmp/MOTR/data/crowdhuman/labels_with_ids/train/273275,d79e3000d97c2336.txt
Killing subprocess 63352
Killing subprocess 63353
Traceback (most recent call last):
File "/opt/conda/envs/deformable_detr/lib/python3.8/runpy.py", line 194, in _run_module_as_main
return _run_code(code, main_globals, None,
File "/opt/conda/envs/deformable_detr/lib/python3.8/runpy.py", line 87, in _run_code
exec(code, run_globals)
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 340, in
main()
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 326, in main
sigkill_handler(signal.SIGTERM, None) # not coming back
File "/opt/conda/envs/deformable_detr/lib/python3.8/site-packages/torch/distributed/launch.py", line 301, in sigkill_handler
raise subprocess.CalledProcessError(returncode=last_return_code, cmd=cmd)
subprocess.CalledProcessError: Command '['/opt/conda/envs/deformable_detr/bin/python3', '-u', 'main.py', '--meta_arch', 'motr', '--epoch', '200', '--with_box_refine', '--lr_drop', '100', '--lr', '2e-4', '--lr_backbone', '2e-5', '--output_dir', 'exps/e2e_motr_r50_joint', '--batch_size', '1', '--sample_mode', 'random_interval', '--sample_interval', '10', '--sampler_steps', '50', '90', '150', '--sampler_lengths', '2', '3', '4', '5', '--update_query_pos', '--merger_dropout', '0', '--dropout', '0', '--random_drop', '0.1', '--fp_ratio', '0.3', '--query_interaction_layer', 'QIM', '--extra_track_attn', '--data_txt_path_train', './datasets/data_path/crowdhuman.train', '--data_txt_path_val', './datasets/data_path/crowdhuman.val', '--mot_path', '/data/tmp/MOTR/data/', '--dataset_file', 'e2e_joint']' returned non-zero exit status 1.
The text was updated successfully, but these errors were encountered: