From ee2e5423571d57d7f4156bade5a2cd6470fdffae Mon Sep 17 00:00:00 2001 From: Cycyes <92714336+Cycyes@users.noreply.github.com> Date: Tue, 21 Nov 2023 09:13:26 +0800 Subject: [PATCH] Add GroundingDINO on ODinW results, and support caption prompt of GroundingDINO (#11187) --- configs/odinw/README.md | 116 +-- .../grounding_dino_swin-b_pretrain_odinw13.py | 338 ++++++++ .../grounding_dino_swin-b_pretrain_odinw35.py | 796 ++++++++++++++++++ .../grounding_dino_swin-t_pretrain_odinw13.py | 338 ++++++++ .../grounding_dino_swin-t_pretrain_odinw35.py | 796 ++++++++++++++++++ mmdet/models/detectors/grounding_dino.py | 105 ++- 6 files changed, 2409 insertions(+), 80 deletions(-) create mode 100644 configs/odinw/grounding_dino_swin-b_pretrain_odinw13.py create mode 100644 configs/odinw/grounding_dino_swin-b_pretrain_odinw35.py create mode 100644 configs/odinw/grounding_dino_swin-t_pretrain_odinw13.py create mode 100644 configs/odinw/grounding_dino_swin-t_pretrain_odinw35.py diff --git a/configs/odinw/README.md b/configs/odinw/README.md index 108aac3685e..72d95933653 100644 --- a/configs/odinw/README.md +++ b/configs/odinw/README.md @@ -6,7 +6,7 @@ ## Get Started -1. development Developmennt Setup can reger to hits /\\To download dataset, you can refer to [reference document](../../docs/zh_cn/user_guides/dataset_prepare.md) +1. To download dataset, you can refer to [reference document](../../docs/zh_cn/user_guides/dataset_prepare.md) 2. You can use the following data to run the inference. @@ -22,73 +22,75 @@ Learning visual representations from natural language supervision has recently s ## Results and models of odinw13 -| Method | GLIP-T(A) | Official | GLIP-T(B) | Official | GLIP-T(C) | Official | -| --------------------- | --------- | --------- | --------- | --------- | --------- | --------- | -| AerialMaritimeDrone | 0.123 | 0.122 | 0.110 | 0.11 | 0.130 | 0.130 | -| Aquarium | 0.175 | 0.174 | 0.173 | 0.169 | 0.191 | 0.190 | -| CottontailRabbits | 0.686 | 0.686 | 0.688 | 0.688 | 0.744 | 0.744 | -| EgoHands | 0.013 | 0.013 | 0.003 | 0.540 | 0.314 | 0.315 | -| NorthAmericaMushrooms | 0.502 | 0.502 | 0.367 | 0.051 | 0.297 | 0.296 | -| Packages | 0.589 | 0.589 | 0.083 | 0.030 | 0.699 | 0.699 | -| PascalVOC | 0.512 | 0.512 | 0.541 | 0.288 | 0.565 | 0.565 | -| pistols | 0.339 | 0.339 | 0.502 | 0.338 | 0.503 | 0.504 | -| pothole | 0.007 | 0.007 | 0.030 | 0.475 | 0.058 | 0.058 | -| Raccoon | 0.075 | 0.075 | 0.285 | 0.288 | 0.241 | 0.244 | -| ShellfishOpenImages | 0.372 | 0.372 | 0.337 | 0.338 | 0.300 | 0.302 | -| thermalDogsAndPeople | 0.372 | 0.372 | 0.475 | 0.475 | 0.510 | 0.510 | -| VehiclesOpenImages | 0.574 | 0.574 | 0.562 | 0.547 | 0.549 | 0.534 | -| Average | **0.334** | **0.324** | **0.320** | **0.318** | **0.392** | **0.392** | +| Method | GLIP-T(A) | Official | GLIP-T(B) | Official | GLIP-T(C) | Official | GroundingDINO-T | GroundingDINO-B | +| --------------------- | --------- | --------- | --------- | --------- | --------- | --------- | --------------- | --------------- | +| AerialMaritimeDrone | 0.123 | 0.122 | 0.110 | 0.110 | 0.130 | 0.130 | 0.173 | 0.281 | +| Aquarium | 0.175 | 0.174 | 0.173 | 0.169 | 0.191 | 0.190 | 0.195 | 0.445 | +| CottontailRabbits | 0.686 | 0.686 | 0.688 | 0.688 | 0.744 | 0.744 | 0.799 | 0.808 | +| EgoHands | 0.013 | 0.013 | 0.003 | 0.004 | 0.314 | 0.315 | 0.608 | 0.764 | +| NorthAmericaMushrooms | 0.502 | 0.502 | 0.367 | 0.367 | 0.297 | 0.296 | 0.507 | 0.675 | +| Packages | 0.589 | 0.589 | 0.083 | 0.083 | 0.699 | 0.699 | 0.687 | 0.670 | +| PascalVOC | 0.512 | 0.512 | 0.541 | 0.540 | 0.565 | 0.565 | 0.563 | 0.711 | +| pistols | 0.339 | 0.339 | 0.502 | 0.501 | 0.503 | 0.504 | 0.726 | 0.771 | +| pothole | 0.007 | 0.007 | 0.030 | 0.030 | 0.058 | 0.058 | 0.215 | 0.478 | +| Raccoon | 0.075 | 0.074 | 0.285 | 0.288 | 0.241 | 0.244 | 0.549 | 0.541 | +| ShellfishOpenImages | 0.253 | 0.253 | 0.337 | 0.338 | 0.300 | 0.302 | 0.393 | 0.650 | +| thermalDogsAndPeople | 0.372 | 0.372 | 0.475 | 0.475 | 0.510 | 0.510 | 0.657 | 0.633 | +| VehiclesOpenImages | 0.574 | 0.566 | 0.562 | 0.547 | 0.549 | 0.534 | 0.613 | 0.647 | +| Average | **0.325** | **0.324** | **0.320** | **0.318** | **0.392** | **0.392** | **0.514** | **0.621** | Note: 1. The above are zero-shot evaluation results. -2. The config and weights can be found at [here](../glip/README.md) +2. The config and weights of GLIPs models can be found at [here](../glip/README.md) +3. The config and weights of GroundingDINO models can be found at [here](../grounding_dino/README.md) ## Results and models of odinw35 -| Method | GLIP-T(A) | Official | GLIP-T(B) | Official | GLIP-T(C) | Official | -| --------------------------- | --------- | --------- | --------- | --------- | --------- | --------- | -| AerialMaritimeDrone_large | 0.123 | 0.122 | 0.110 | 0.110 | 0.130 | 0.130 | -| AerialMaritimeDrone_tiled | 0.174 | 0.174 | 0.172 | 0.172 | 0.172 | 0.172 | -| AmericanSignLanguageLetters | 0.001 | 0.001 | 0.003 | 0.003 | 0.009 | 0.009 | -| Aquarium | 0.175 | 0.175 | 0.173 | 0.171 | 0.192 | 0.182 | -| BCCD | 0.016 | 0.016 | 0.001 | 0.001 | 0.000 | 0.000 | -| boggleBoards | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | -| brackishUnderwater | 0.016 | 0..013 | 0.021 | 0.027 | 0.020 | 0.022 | -| ChessPieces | 0.001 | 0.001 | 0.000 | 0.000 | 0.001 | 0.001 | -| CottontailRabbits | 0.710 | 0.709 | 0.683 | 0.683 | 0.752 | 0.752 | -| dice | 0.005 | 0.005 | 0.004 | 0.004 | 0.004 | 0.004 | -| DroneControl | 0.016 | 0.017 | 0.006 | 0.008 | 0.005 | 0.007 | -| EgoHands_generic | 0.009 | 0.010 | 0.005 | 0.006 | 0.510 | 0.508 | -| EgoHands_specific | 0.001 | 0.001 | 0.004 | 0.006 | 0.003 | 0.004 | -| HardHatWorkers | 0.029 | 0.029 | 0.023 | 0.023 | 0.033 | 0.033 | -| MaskWearing | 0.007 | 0.007 | 0.003 | 0.002 | 0.005 | 0.005 | -| MountainDewCommercial | 0.218 | 0.227 | 0.199 | 0.197 | 0.478 | 0.463 | -| NorthAmericaMushrooms | 0.502 | 0.502 | 0.450 | 0.450 | 0.497 | 0.497 | -| openPoetryVision | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | -| OxfordPets_by_breed | 0.001 | 0.002 | 0.002 | 0.004 | 0.001 | 0.002 | -| OxfordPets_by_species | 0.016 | 0.011 | 0.012 | 0.009 | 0.013 | 0.009 | -| PKLot | 0.002 | 0.002 | 0.000 | 0.000 | 0.000 | 0.000 | -| Packages | 0.569 | 0.569 | 0.279 | 0.279 | 0.712 | 0.712 | -| PascalVOC | 0.512 | 0.512 | 0.541 | 0.540 | 0.565 | 0.565 | -| pistols | 0.339 | 0.339 | 0.502 | 0.501 | 0.503 | 0.504 | -| plantdoc | 0.002 | 0.002 | 0.007 | 0.007 | 0.009 | 0.009 | -| pothole | 0.007 | 0.010 | 0.024 | 0.025 | 0.085 | 0.101 | -| Raccoons | 0.075 | 0.074 | 0.285 | 0.288 | 0.241 | 0.244 | -| selfdrivingCar | 0.071 | 0.072 | 0.074 | 0.074 | 0.081 | 0.080 | -| ShellfishOpenImages | 0.253 | 0.253 | 0.337 | 0.338 | 0.300 | 0.302 | -| ThermalCheetah | 0.028 | 0.028 | 0.000 | 0.000 | 0.028 | 0.028 | -| thermalDogsAndPeople | 0.372 | 0.372 | 0.475 | 0.475 | 0.510 | 0.510 | -| UnoCards | 0.000 | 0.000 | 0.000 | 0.001 | 0.002 | 0.003 | -| VehiclesOpenImages | 0.574 | 0.566 | 0.562 | 0.547 | 0.549 | 0.534 | -| WildfireSmoke | 0.000 | 0.000 | 0.000 | 0.000 | 0.017 | 0.017 | -| websiteScreenshots | 0.003 | 0.004 | 0.003 | 0.005 | 0.005 | 0.006 | -| Average | **0.134** | **0.134** | **0.138** | **0.138** | **0.179** | **0.178** | +| Method | GLIP-T(A) | Official | GLIP-T(B) | Official | GLIP-T(C) | Official | GroundingDINO-T | GroundingDINO-B | +| --------------------------- | --------- | --------- | --------- | --------- | --------- | --------- | --------------- | --------------- | +| AerialMaritimeDrone_large | 0.123 | 0.122 | 0.110 | 0.110 | 0.130 | 0.130 | 0.173 | 0.281 | +| AerialMaritimeDrone_tiled | 0.174 | 0.174 | 0.172 | 0.172 | 0.172 | 0.172 | 0.206 | 0.364 | +| AmericanSignLanguageLetters | 0.001 | 0.001 | 0.003 | 0.003 | 0.009 | 0.009 | 0.002 | 0.096 | +| Aquarium | 0.175 | 0.175 | 0.173 | 0.171 | 0.192 | 0.182 | 0.195 | 0.445 | +| BCCD | 0.016 | 0.016 | 0.001 | 0.001 | 0.000 | 0.000 | 0.161 | 0.584 | +| boggleBoards | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.134 | +| brackishUnderwater | 0.016 | 0..013 | 0.021 | 0.027 | 0.020 | 0.022 | 0.021 | 0.454 | +| ChessPieces | 0.001 | 0.001 | 0.000 | 0.000 | 0.001 | 0.001 | 0.000 | 0.000 | +| CottontailRabbits | 0.710 | 0.709 | 0.683 | 0.683 | 0.752 | 0.752 | 0.806 | 0.797 | +| dice | 0.005 | 0.005 | 0.004 | 0.004 | 0.004 | 0.004 | 0.004 | 0.082 | +| DroneControl | 0.016 | 0.017 | 0.006 | 0.008 | 0.005 | 0.007 | 0.042 | 0.638 | +| EgoHands_generic | 0.009 | 0.010 | 0.005 | 0.006 | 0.510 | 0.508 | 0.608 | 0.764 | +| EgoHands_specific | 0.001 | 0.001 | 0.004 | 0.006 | 0.003 | 0.004 | 0.002 | 0.687 | +| HardHatWorkers | 0.029 | 0.029 | 0.023 | 0.023 | 0.033 | 0.033 | 0.046 | 0.439 | +| MaskWearing | 0.007 | 0.007 | 0.003 | 0.002 | 0.005 | 0.005 | 0.004 | 0.406 | +| MountainDewCommercial | 0.218 | 0.227 | 0.199 | 0.197 | 0.478 | 0.463 | 0.430 | 0.580 | +| NorthAmericaMushrooms | 0.502 | 0.502 | 0.450 | 0.450 | 0.497 | 0.497 | 0.471 | 0.501 | +| openPoetryVision | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.000 | 0.051 | +| OxfordPets_by_breed | 0.001 | 0.002 | 0.002 | 0.004 | 0.001 | 0.002 | 0.003 | 0.799 | +| OxfordPets_by_species | 0.016 | 0.011 | 0.012 | 0.009 | 0.013 | 0.009 | 0.011 | 0.872 | +| PKLot | 0.002 | 0.002 | 0.000 | 0.000 | 0.000 | 0.000 | 0.001 | 0.774 | +| Packages | 0.569 | 0.569 | 0.279 | 0.279 | 0.712 | 0.712 | 0.695 | 0.728 | +| PascalVOC | 0.512 | 0.512 | 0.541 | 0.540 | 0.565 | 0.565 | 0.563 | 0.711 | +| pistols | 0.339 | 0.339 | 0.502 | 0.501 | 0.503 | 0.504 | 0.726 | 0.771 | +| plantdoc | 0.002 | 0.002 | 0.007 | 0.007 | 0.009 | 0.009 | 0.005 | 0.376 | +| pothole | 0.007 | 0.010 | 0.024 | 0.025 | 0.085 | 0.101 | 0.215 | 0.478 | +| Raccoons | 0.075 | 0.074 | 0.285 | 0.288 | 0.241 | 0.244 | 0.549 | 0.541 | +| selfdrivingCar | 0.071 | 0.072 | 0.074 | 0.074 | 0.081 | 0.080 | 0.089 | 0.318 | +| ShellfishOpenImages | 0.253 | 0.253 | 0.337 | 0.338 | 0.300 | 0.302 | 0.393 | 0.650 | +| ThermalCheetah | 0.028 | 0.028 | 0.000 | 0.000 | 0.028 | 0.028 | 0.087 | 0.290 | +| thermalDogsAndPeople | 0.372 | 0.372 | 0.475 | 0.475 | 0.510 | 0.510 | 0.657 | 0.633 | +| UnoCards | 0.000 | 0.000 | 0.000 | 0.001 | 0.002 | 0.003 | 0.006 | 0.754 | +| VehiclesOpenImages | 0.574 | 0.566 | 0.562 | 0.547 | 0.549 | 0.534 | 0.613 | 0.647 | +| WildfireSmoke | 0.000 | 0.000 | 0.000 | 0.000 | 0.017 | 0.017 | 0.134 | 0.410 | +| websiteScreenshots | 0.003 | 0.004 | 0.003 | 0.005 | 0.005 | 0.006 | 0.012 | 0.175 | +| Average | **0.134** | **0.134** | **0.138** | **0.138** | **0.179** | **0.178** | **0.227** | **0.492** | Note: 1. The above are zero-shot evaluation results. -2. The config and weights can be found at [here](../glip/README.md) +2. The config and weights of GLIPs models can be found at [here](../glip/README.md) +3. The config and weights of GroundingDINO models can be found at [here](../grounding_dino/README.md) ## Citation diff --git a/configs/odinw/grounding_dino_swin-b_pretrain_odinw13.py b/configs/odinw/grounding_dino_swin-b_pretrain_odinw13.py new file mode 100644 index 00000000000..b853d23fafe --- /dev/null +++ b/configs/odinw/grounding_dino_swin-b_pretrain_odinw13.py @@ -0,0 +1,338 @@ +_base_ = '../grounding_dino/grounding_dino_swin-b_pretrain_mixeddata.py' + +dataset_type = 'CocoDataset' +data_root = 'data/odinw/' + +base_test_pipeline = _base_.test_pipeline +base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape', + 'img_shape', 'scale_factor', 'text', + 'custom_entities', 'caption_prompt') + +# ---------------------1 AerialMaritimeDrone---------------------# +class_name = ('boat', 'car', 'dock', 'jetski', 'lift') +metainfo = dict(classes=class_name) +_data_root = data_root + 'AerialMaritimeDrone/large/' +dataset_AerialMaritimeDrone = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + test_mode=True, + pipeline=base_test_pipeline, + return_classes=True) +val_evaluator_AerialMaritimeDrone = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------2 Aquarium---------------------# +class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish', + 'stingray') +metainfo = dict(classes=class_name) +_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/' + +caption_prompt = None +# caption_prompt = { +# 'penguin': { +# 'suffix': ', which is black and white' +# }, +# 'puffin': { +# 'suffix': ' with orange beaks' +# }, +# 'stingray': { +# 'suffix': ' which is flat and round' +# }, +# } +dataset_Aquarium = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_Aquarium = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------3 CottontailRabbits---------------------# +class_name = ('Cottontail-Rabbit', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'CottontailRabbits/' + +caption_prompt = None +# caption_prompt = {'Cottontail-Rabbit': {'name': 'rabbit'}} + +dataset_CottontailRabbits = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_CottontailRabbits = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------4 EgoHands---------------------# +class_name = ('hand', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'EgoHands/generic/' + +caption_prompt = None +# caption_prompt = {'hand': {'suffix': ' of a person'}} + +dataset_EgoHands = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_EgoHands = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------5 NorthAmericaMushrooms---------------------# +class_name = ('CoW', 'chanterelle') +metainfo = dict(classes=class_name) +_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/' # noqa + +caption_prompt = None +# caption_prompt = { +# 'CoW': { +# 'name': 'flat mushroom' +# }, +# 'chanterelle': { +# 'name': 'yellow mushroom' +# } +# } + +dataset_NorthAmericaMushrooms = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_NorthAmericaMushrooms = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------6 Packages---------------------# +class_name = ('package', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'Packages/Raw/' + +caption_prompt = None +# caption_prompt = { +# 'package': { +# 'prefix': 'there is a ', +# 'suffix': ' on the porch' +# } +# } + +dataset_Packages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_Packages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------7 PascalVOC---------------------# +class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', + 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor') +metainfo = dict(classes=class_name) +_data_root = data_root + 'PascalVOC/' +dataset_PascalVOC = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_PascalVOC = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------8 pistols---------------------# +class_name = ('pistol', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'pistols/export/' +dataset_pistols = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_pistols = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------9 pothole---------------------# +class_name = ('pothole', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'pothole/' + +caption_prompt = None +# caption_prompt = { +# 'pothole': { +# 'prefix': 'there are some ', +# 'name': 'holes', +# 'suffix': ' on the road' +# } +# } + +dataset_pothole = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_pothole = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------10 Raccoon---------------------# +class_name = ('raccoon', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/' +dataset_Raccoon = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_Raccoon = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------11 ShellfishOpenImages---------------------# +class_name = ('Crab', 'Lobster', 'Shrimp') +metainfo = dict(classes=class_name) +_data_root = data_root + 'ShellfishOpenImages/raw/' +dataset_ShellfishOpenImages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_ShellfishOpenImages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------12 thermalDogsAndPeople---------------------# +class_name = ('dog', 'person') +metainfo = dict(classes=class_name) +_data_root = data_root + 'thermalDogsAndPeople/' +dataset_thermalDogsAndPeople = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_thermalDogsAndPeople = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------13 VehiclesOpenImages---------------------# +class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck') +metainfo = dict(classes=class_name) +_data_root = data_root + 'VehiclesOpenImages/416x416/' +dataset_VehiclesOpenImages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_VehiclesOpenImages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# --------------------- Config---------------------# +dataset_prefixes = [ + 'AerialMaritimeDrone', 'Aquarium', 'CottontailRabbits', 'EgoHands', + 'NorthAmericaMushrooms', 'Packages', 'PascalVOC', 'pistols', 'pothole', + 'Raccoon', 'ShellfishOpenImages', 'thermalDogsAndPeople', + 'VehiclesOpenImages' +] +datasets = [ + dataset_AerialMaritimeDrone, dataset_Aquarium, dataset_CottontailRabbits, + dataset_EgoHands, dataset_NorthAmericaMushrooms, dataset_Packages, + dataset_PascalVOC, dataset_pistols, dataset_pothole, dataset_Raccoon, + dataset_ShellfishOpenImages, dataset_thermalDogsAndPeople, + dataset_VehiclesOpenImages +] +metrics = [ + val_evaluator_AerialMaritimeDrone, val_evaluator_Aquarium, + val_evaluator_CottontailRabbits, val_evaluator_EgoHands, + val_evaluator_NorthAmericaMushrooms, val_evaluator_Packages, + val_evaluator_PascalVOC, val_evaluator_pistols, val_evaluator_pothole, + val_evaluator_Raccoon, val_evaluator_ShellfishOpenImages, + val_evaluator_thermalDogsAndPeople, val_evaluator_VehiclesOpenImages +] + +# -------------------------------------------------# +val_dataloader = dict( + dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets)) +test_dataloader = val_dataloader + +val_evaluator = dict( + _delete_=True, + type='MultiDatasetsEvaluator', + metrics=metrics, + dataset_prefixes=dataset_prefixes) +test_evaluator = val_evaluator diff --git a/configs/odinw/grounding_dino_swin-b_pretrain_odinw35.py b/configs/odinw/grounding_dino_swin-b_pretrain_odinw35.py new file mode 100644 index 00000000000..a4b546b5998 --- /dev/null +++ b/configs/odinw/grounding_dino_swin-b_pretrain_odinw35.py @@ -0,0 +1,796 @@ +_base_ = '../grounding_dino/grounding_dino_swin-b_pretrain_mixeddata.py' + +dataset_type = 'CocoDataset' +data_root = 'data/odinw/' + +base_test_pipeline = _base_.test_pipeline +base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape', + 'img_shape', 'scale_factor', 'text', + 'custom_entities', 'caption_prompt') + +# ---------------------1 AerialMaritimeDrone_large---------------------# +class_name = ('boat', 'car', 'dock', 'jetski', 'lift') +metainfo = dict(classes=class_name) +_data_root = data_root + 'AerialMaritimeDrone/large/' +dataset_AerialMaritimeDrone_large = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_AerialMaritimeDrone_large = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------2 AerialMaritimeDrone_tiled---------------------# +class_name = ('boat', 'car', 'dock', 'jetski', 'lift') +metainfo = dict(classes=class_name) +_data_root = data_root + 'AerialMaritimeDrone/tiled/' +dataset_AerialMaritimeDrone_tiled = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_AerialMaritimeDrone_tiled = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------3 AmericanSignLanguageLetters---------------------# +class_name = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z') +metainfo = dict(classes=class_name) +_data_root = data_root + 'AmericanSignLanguageLetters/American Sign Language Letters.v1-v1.coco/' # noqa +dataset_AmericanSignLanguageLetters = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_AmericanSignLanguageLetters = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------4 Aquarium---------------------# +class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish', + 'stingray') +metainfo = dict(classes=class_name) +_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/' +dataset_Aquarium = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_Aquarium = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------5 BCCD---------------------# +class_name = ('Platelets', 'RBC', 'WBC') +metainfo = dict(classes=class_name) +_data_root = data_root + 'BCCD/BCCD.v3-raw.coco/' +dataset_BCCD = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_BCCD = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------6 boggleBoards---------------------# +class_name = ('Q', 'a', 'an', 'b', 'c', 'd', 'e', 'er', 'f', 'g', 'h', 'he', + 'i', 'in', 'j', 'k', 'l', 'm', 'n', 'o', 'o ', 'p', 'q', 'qu', + 'r', 's', 't', 't\\', 'th', 'u', 'v', 'w', 'wild', 'x', 'y', 'z') +metainfo = dict(classes=class_name) +_data_root = data_root + 'boggleBoards/416x416AutoOrient/export/' +dataset_boggleBoards = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_boggleBoards = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------7 brackishUnderwater---------------------# +class_name = ('crab', 'fish', 'jellyfish', 'shrimp', 'small_fish', 'starfish') +metainfo = dict(classes=class_name) +_data_root = data_root + 'brackishUnderwater/960x540/' +dataset_brackishUnderwater = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_brackishUnderwater = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------8 ChessPieces---------------------# +class_name = (' ', 'black bishop', 'black king', 'black knight', 'black pawn', + 'black queen', 'black rook', 'white bishop', 'white king', + 'white knight', 'white pawn', 'white queen', 'white rook') +metainfo = dict(classes=class_name) +_data_root = data_root + 'ChessPieces/Chess Pieces.v23-raw.coco/' +dataset_ChessPieces = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/new_annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_ChessPieces = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/new_annotations_without_background.json', + metric='bbox') + +# ---------------------9 CottontailRabbits---------------------# +class_name = ('rabbit', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'CottontailRabbits/' +dataset_CottontailRabbits = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/new_annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_CottontailRabbits = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/new_annotations_without_background.json', + metric='bbox') + +# ---------------------10 dice---------------------# +class_name = ('1', '2', '3', '4', '5', '6') +metainfo = dict(classes=class_name) +_data_root = data_root + 'dice/mediumColor/export/' +dataset_dice = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_dice = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------11 DroneControl---------------------# +class_name = ('follow', 'follow_hand', 'land', 'land_hand', 'null', 'object', + 'takeoff', 'takeoff-hand') +metainfo = dict(classes=class_name) +_data_root = data_root + 'DroneControl/Drone Control.v3-raw.coco/' +dataset_DroneControl = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_DroneControl = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------12 EgoHands_generic---------------------# +class_name = ('hand', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'EgoHands/generic/' +caption_prompt = {'hand': {'suffix': ' of a person'}} +dataset_EgoHands_generic = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + # NOTE w. prompt 0.548; wo. prompt 0.764 + # caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_EgoHands_generic = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------13 EgoHands_specific---------------------# +class_name = ('myleft', 'myright', 'yourleft', 'yourright') +metainfo = dict(classes=class_name) +_data_root = data_root + 'EgoHands/specific/' +dataset_EgoHands_specific = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_EgoHands_specific = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------14 HardHatWorkers---------------------# +class_name = ('head', 'helmet', 'person') +metainfo = dict(classes=class_name) +_data_root = data_root + 'HardHatWorkers/raw/' +dataset_HardHatWorkers = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_HardHatWorkers = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------15 MaskWearing---------------------# +class_name = ('mask', 'no-mask') +metainfo = dict(classes=class_name) +_data_root = data_root + 'MaskWearing/raw/' +dataset_MaskWearing = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_MaskWearing = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------16 MountainDewCommercial---------------------# +class_name = ('bottle', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'MountainDewCommercial/' +dataset_MountainDewCommercial = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_MountainDewCommercial = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------17 NorthAmericaMushrooms---------------------# +class_name = ('flat mushroom', 'yellow mushroom') +metainfo = dict(classes=class_name) +_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/' # noqa +dataset_NorthAmericaMushrooms = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/new_annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_NorthAmericaMushrooms = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/new_annotations_without_background.json', + metric='bbox') + +# ---------------------18 openPoetryVision---------------------# +class_name = ('American Typewriter', 'Andale Mono', 'Apple Chancery', 'Arial', + 'Avenir', 'Baskerville', 'Big Caslon', 'Bradley Hand', + 'Brush Script MT', 'Chalkboard', 'Comic Sans MS', 'Copperplate', + 'Courier', 'Didot', 'Futura', 'Geneva', 'Georgia', 'Gill Sans', + 'Helvetica', 'Herculanum', 'Impact', 'Kefa', 'Lucida Grande', + 'Luminari', 'Marker Felt', 'Menlo', 'Monaco', 'Noteworthy', + 'Optima', 'PT Sans', 'PT Serif', 'Palatino', 'Papyrus', + 'Phosphate', 'Rockwell', 'SF Pro', 'SignPainter', 'Skia', + 'Snell Roundhand', 'Tahoma', 'Times New Roman', 'Trebuchet MS', + 'Verdana') +metainfo = dict(classes=class_name) +_data_root = data_root + 'openPoetryVision/512x512/' +dataset_openPoetryVision = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_openPoetryVision = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------19 OxfordPets_by_breed---------------------# +class_name = ('cat-Abyssinian', 'cat-Bengal', 'cat-Birman', 'cat-Bombay', + 'cat-British_Shorthair', 'cat-Egyptian_Mau', 'cat-Maine_Coon', + 'cat-Persian', 'cat-Ragdoll', 'cat-Russian_Blue', 'cat-Siamese', + 'cat-Sphynx', 'dog-american_bulldog', + 'dog-american_pit_bull_terrier', 'dog-basset_hound', + 'dog-beagle', 'dog-boxer', 'dog-chihuahua', + 'dog-english_cocker_spaniel', 'dog-english_setter', + 'dog-german_shorthaired', 'dog-great_pyrenees', 'dog-havanese', + 'dog-japanese_chin', 'dog-keeshond', 'dog-leonberger', + 'dog-miniature_pinscher', 'dog-newfoundland', 'dog-pomeranian', + 'dog-pug', 'dog-saint_bernard', 'dog-samoyed', + 'dog-scottish_terrier', 'dog-shiba_inu', + 'dog-staffordshire_bull_terrier', 'dog-wheaten_terrier', + 'dog-yorkshire_terrier') +metainfo = dict(classes=class_name) +_data_root = data_root + 'OxfordPets/by-breed/' # noqa +dataset_OxfordPets_by_breed = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_OxfordPets_by_breed = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------20 OxfordPets_by_species---------------------# +class_name = ('cat', 'dog') +metainfo = dict(classes=class_name) +_data_root = data_root + 'OxfordPets/by-species/' # noqa +dataset_OxfordPets_by_species = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_OxfordPets_by_species = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------21 PKLot---------------------# +class_name = ('space-empty', 'space-occupied') +metainfo = dict(classes=class_name) +_data_root = data_root + 'PKLot/640/' # noqa +dataset_PKLot = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_PKLot = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------22 Packages---------------------# +class_name = ('package', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'Packages/Raw/' +caption_prompt = { + 'package': { + 'prefix': 'there is a ', + 'suffix': ' on the porch' + } +} +dataset_Packages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, # NOTE w. prompt 0.728; wo. prompt 0.670 + test_mode=True, + return_classes=True) +val_evaluator_Packages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------23 PascalVOC---------------------# +class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', + 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor') +metainfo = dict(classes=class_name) +_data_root = data_root + 'PascalVOC/' +dataset_PascalVOC = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_PascalVOC = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------24 pistols---------------------# +class_name = ('pistol', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'pistols/export/' +dataset_pistols = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_pistols = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------25 plantdoc---------------------# +class_name = ('Apple Scab Leaf', 'Apple leaf', 'Apple rust leaf', + 'Bell_pepper leaf', 'Bell_pepper leaf spot', 'Blueberry leaf', + 'Cherry leaf', 'Corn Gray leaf spot', 'Corn leaf blight', + 'Corn rust leaf', 'Peach leaf', 'Potato leaf', + 'Potato leaf early blight', 'Potato leaf late blight', + 'Raspberry leaf', 'Soyabean leaf', 'Soybean leaf', + 'Squash Powdery mildew leaf', 'Strawberry leaf', + 'Tomato Early blight leaf', 'Tomato Septoria leaf spot', + 'Tomato leaf', 'Tomato leaf bacterial spot', + 'Tomato leaf late blight', 'Tomato leaf mosaic virus', + 'Tomato leaf yellow virus', 'Tomato mold leaf', + 'Tomato two spotted spider mites leaf', 'grape leaf', + 'grape leaf black rot') +metainfo = dict(classes=class_name) +_data_root = data_root + 'plantdoc/416x416/' +dataset_plantdoc = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_plantdoc = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------26 pothole---------------------# +class_name = ('pothole', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'pothole/' +caption_prompt = { + 'pothole': { + 'name': 'holes', + 'prefix': 'there are some ', + 'suffix': ' on the road' + } +} +dataset_pothole = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + # NOTE w. prompt 0.221; wo. prompt 0.478 + # caption_prompt=caption_prompt, + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_pothole = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------27 Raccoon---------------------# +class_name = ('raccoon', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/' +dataset_Raccoon = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_Raccoon = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------28 selfdrivingCar---------------------# +class_name = ('biker', 'car', 'pedestrian', 'trafficLight', + 'trafficLight-Green', 'trafficLight-GreenLeft', + 'trafficLight-Red', 'trafficLight-RedLeft', + 'trafficLight-Yellow', 'trafficLight-YellowLeft', 'truck') +metainfo = dict(classes=class_name) +_data_root = data_root + 'selfdrivingCar/fixedLarge/export/' +dataset_selfdrivingCar = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_selfdrivingCar = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------29 ShellfishOpenImages---------------------# +class_name = ('Crab', 'Lobster', 'Shrimp') +metainfo = dict(classes=class_name) +_data_root = data_root + 'ShellfishOpenImages/raw/' +dataset_ShellfishOpenImages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_ShellfishOpenImages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------30 ThermalCheetah---------------------# +class_name = ('cheetah', 'human') +metainfo = dict(classes=class_name) +_data_root = data_root + 'ThermalCheetah/' +dataset_ThermalCheetah = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_ThermalCheetah = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------31 thermalDogsAndPeople---------------------# +class_name = ('dog', 'person') +metainfo = dict(classes=class_name) +_data_root = data_root + 'thermalDogsAndPeople/' +dataset_thermalDogsAndPeople = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_thermalDogsAndPeople = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------32 UnoCards---------------------# +class_name = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', + '12', '13', '14') +metainfo = dict(classes=class_name) +_data_root = data_root + 'UnoCards/raw/' +dataset_UnoCards = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_UnoCards = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------33 VehiclesOpenImages---------------------# +class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck') +metainfo = dict(classes=class_name) +_data_root = data_root + 'VehiclesOpenImages/416x416/' +dataset_VehiclesOpenImages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_VehiclesOpenImages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------34 WildfireSmoke---------------------# +class_name = ('smoke', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'WildfireSmoke/' +dataset_WildfireSmoke = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_WildfireSmoke = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------35 websiteScreenshots---------------------# +class_name = ('button', 'field', 'heading', 'iframe', 'image', 'label', 'link', + 'text') +metainfo = dict(classes=class_name) +_data_root = data_root + 'websiteScreenshots/' +dataset_websiteScreenshots = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_websiteScreenshots = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# --------------------- Config---------------------# + +dataset_prefixes = [ + 'AerialMaritimeDrone_large', + 'AerialMaritimeDrone_tiled', + 'AmericanSignLanguageLetters', + 'Aquarium', + 'BCCD', + 'boggleBoards', + 'brackishUnderwater', + 'ChessPieces', + 'CottontailRabbits', + 'dice', + 'DroneControl', + 'EgoHands_generic', + 'EgoHands_specific', + 'HardHatWorkers', + 'MaskWearing', + 'MountainDewCommercial', + 'NorthAmericaMushrooms', + 'openPoetryVision', + 'OxfordPets_by_breed', + 'OxfordPets_by_species', + 'PKLot', + 'Packages', + 'PascalVOC', + 'pistols', + 'plantdoc', + 'pothole', + 'Raccoons', + 'selfdrivingCar', + 'ShellfishOpenImages', + 'ThermalCheetah', + 'thermalDogsAndPeople', + 'UnoCards', + 'VehiclesOpenImages', + 'WildfireSmoke', + 'websiteScreenshots', +] + +datasets = [ + dataset_AerialMaritimeDrone_large, dataset_AerialMaritimeDrone_tiled, + dataset_AmericanSignLanguageLetters, dataset_Aquarium, dataset_BCCD, + dataset_boggleBoards, dataset_brackishUnderwater, dataset_ChessPieces, + dataset_CottontailRabbits, dataset_dice, dataset_DroneControl, + dataset_EgoHands_generic, dataset_EgoHands_specific, + dataset_HardHatWorkers, dataset_MaskWearing, dataset_MountainDewCommercial, + dataset_NorthAmericaMushrooms, dataset_openPoetryVision, + dataset_OxfordPets_by_breed, dataset_OxfordPets_by_species, dataset_PKLot, + dataset_Packages, dataset_PascalVOC, dataset_pistols, dataset_plantdoc, + dataset_pothole, dataset_Raccoon, dataset_selfdrivingCar, + dataset_ShellfishOpenImages, dataset_ThermalCheetah, + dataset_thermalDogsAndPeople, dataset_UnoCards, dataset_VehiclesOpenImages, + dataset_WildfireSmoke, dataset_websiteScreenshots +] + +metrics = [ + val_evaluator_AerialMaritimeDrone_large, + val_evaluator_AerialMaritimeDrone_tiled, + val_evaluator_AmericanSignLanguageLetters, val_evaluator_Aquarium, + val_evaluator_BCCD, val_evaluator_boggleBoards, + val_evaluator_brackishUnderwater, val_evaluator_ChessPieces, + val_evaluator_CottontailRabbits, val_evaluator_dice, + val_evaluator_DroneControl, val_evaluator_EgoHands_generic, + val_evaluator_EgoHands_specific, val_evaluator_HardHatWorkers, + val_evaluator_MaskWearing, val_evaluator_MountainDewCommercial, + val_evaluator_NorthAmericaMushrooms, val_evaluator_openPoetryVision, + val_evaluator_OxfordPets_by_breed, val_evaluator_OxfordPets_by_species, + val_evaluator_PKLot, val_evaluator_Packages, val_evaluator_PascalVOC, + val_evaluator_pistols, val_evaluator_plantdoc, val_evaluator_pothole, + val_evaluator_Raccoon, val_evaluator_selfdrivingCar, + val_evaluator_ShellfishOpenImages, val_evaluator_ThermalCheetah, + val_evaluator_thermalDogsAndPeople, val_evaluator_UnoCards, + val_evaluator_VehiclesOpenImages, val_evaluator_WildfireSmoke, + val_evaluator_websiteScreenshots +] + +# -------------------------------------------------# +val_dataloader = dict( + dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets)) +test_dataloader = val_dataloader + +val_evaluator = dict( + _delete_=True, + type='MultiDatasetsEvaluator', + metrics=metrics, + dataset_prefixes=dataset_prefixes) +test_evaluator = val_evaluator diff --git a/configs/odinw/grounding_dino_swin-t_pretrain_odinw13.py b/configs/odinw/grounding_dino_swin-t_pretrain_odinw13.py new file mode 100644 index 00000000000..6421ffc24ab --- /dev/null +++ b/configs/odinw/grounding_dino_swin-t_pretrain_odinw13.py @@ -0,0 +1,338 @@ +_base_ = '../grounding_dino/grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py' # noqa + +dataset_type = 'CocoDataset' +data_root = 'data/odinw/' + +base_test_pipeline = _base_.test_pipeline +base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape', + 'img_shape', 'scale_factor', 'text', + 'custom_entities', 'caption_prompt') + +# ---------------------1 AerialMaritimeDrone---------------------# +class_name = ('boat', 'car', 'dock', 'jetski', 'lift') +metainfo = dict(classes=class_name) +_data_root = data_root + 'AerialMaritimeDrone/large/' +dataset_AerialMaritimeDrone = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + test_mode=True, + pipeline=base_test_pipeline, + return_classes=True) +val_evaluator_AerialMaritimeDrone = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------2 Aquarium---------------------# +class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish', + 'stingray') +metainfo = dict(classes=class_name) +_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/' + +caption_prompt = None +# caption_prompt = { +# 'penguin': { +# 'suffix': ', which is black and white' +# }, +# 'puffin': { +# 'suffix': ' with orange beaks' +# }, +# 'stingray': { +# 'suffix': ' which is flat and round' +# }, +# } +dataset_Aquarium = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_Aquarium = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------3 CottontailRabbits---------------------# +class_name = ('Cottontail-Rabbit', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'CottontailRabbits/' + +caption_prompt = None +# caption_prompt = {'Cottontail-Rabbit': {'name': 'rabbit'}} + +dataset_CottontailRabbits = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_CottontailRabbits = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------4 EgoHands---------------------# +class_name = ('hand', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'EgoHands/generic/' + +caption_prompt = None +# caption_prompt = {'hand': {'suffix': ' of a person'}} + +dataset_EgoHands = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_EgoHands = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------5 NorthAmericaMushrooms---------------------# +class_name = ('CoW', 'chanterelle') +metainfo = dict(classes=class_name) +_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/' # noqa + +caption_prompt = None +# caption_prompt = { +# 'CoW': { +# 'name': 'flat mushroom' +# }, +# 'chanterelle': { +# 'name': 'yellow mushroom' +# } +# } + +dataset_NorthAmericaMushrooms = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_NorthAmericaMushrooms = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------6 Packages---------------------# +class_name = ('package', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'Packages/Raw/' + +caption_prompt = None +# caption_prompt = { +# 'package': { +# 'prefix': 'there is a ', +# 'suffix': ' on the porch' +# } +# } + +dataset_Packages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_Packages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------7 PascalVOC---------------------# +class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', + 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor') +metainfo = dict(classes=class_name) +_data_root = data_root + 'PascalVOC/' +dataset_PascalVOC = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_PascalVOC = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------8 pistols---------------------# +class_name = ('pistol', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'pistols/export/' +dataset_pistols = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_pistols = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------9 pothole---------------------# +class_name = ('pothole', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'pothole/' + +caption_prompt = None +# caption_prompt = { +# 'pothole': { +# 'prefix': 'there are some ', +# 'name': 'holes', +# 'suffix': ' on the road' +# } +# } + +dataset_pothole = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_pothole = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------10 Raccoon---------------------# +class_name = ('raccoon', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/' +dataset_Raccoon = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_Raccoon = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------11 ShellfishOpenImages---------------------# +class_name = ('Crab', 'Lobster', 'Shrimp') +metainfo = dict(classes=class_name) +_data_root = data_root + 'ShellfishOpenImages/raw/' +dataset_ShellfishOpenImages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_ShellfishOpenImages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------12 thermalDogsAndPeople---------------------# +class_name = ('dog', 'person') +metainfo = dict(classes=class_name) +_data_root = data_root + 'thermalDogsAndPeople/' +dataset_thermalDogsAndPeople = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_thermalDogsAndPeople = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------13 VehiclesOpenImages---------------------# +class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck') +metainfo = dict(classes=class_name) +_data_root = data_root + 'VehiclesOpenImages/416x416/' +dataset_VehiclesOpenImages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_VehiclesOpenImages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# --------------------- Config---------------------# +dataset_prefixes = [ + 'AerialMaritimeDrone', 'Aquarium', 'CottontailRabbits', 'EgoHands', + 'NorthAmericaMushrooms', 'Packages', 'PascalVOC', 'pistols', 'pothole', + 'Raccoon', 'ShellfishOpenImages', 'thermalDogsAndPeople', + 'VehiclesOpenImages' +] +datasets = [ + dataset_AerialMaritimeDrone, dataset_Aquarium, dataset_CottontailRabbits, + dataset_EgoHands, dataset_NorthAmericaMushrooms, dataset_Packages, + dataset_PascalVOC, dataset_pistols, dataset_pothole, dataset_Raccoon, + dataset_ShellfishOpenImages, dataset_thermalDogsAndPeople, + dataset_VehiclesOpenImages +] +metrics = [ + val_evaluator_AerialMaritimeDrone, val_evaluator_Aquarium, + val_evaluator_CottontailRabbits, val_evaluator_EgoHands, + val_evaluator_NorthAmericaMushrooms, val_evaluator_Packages, + val_evaluator_PascalVOC, val_evaluator_pistols, val_evaluator_pothole, + val_evaluator_Raccoon, val_evaluator_ShellfishOpenImages, + val_evaluator_thermalDogsAndPeople, val_evaluator_VehiclesOpenImages +] + +# -------------------------------------------------# +val_dataloader = dict( + dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets)) +test_dataloader = val_dataloader + +val_evaluator = dict( + _delete_=True, + type='MultiDatasetsEvaluator', + metrics=metrics, + dataset_prefixes=dataset_prefixes) +test_evaluator = val_evaluator diff --git a/configs/odinw/grounding_dino_swin-t_pretrain_odinw35.py b/configs/odinw/grounding_dino_swin-t_pretrain_odinw35.py new file mode 100644 index 00000000000..78a3d8626c0 --- /dev/null +++ b/configs/odinw/grounding_dino_swin-t_pretrain_odinw35.py @@ -0,0 +1,796 @@ +_base_ = '../grounding_dino/grounding_dino_swin-t_pretrain_obj365_goldg_cap4m.py' # noqa + +dataset_type = 'CocoDataset' +data_root = 'data/odinw/' + +base_test_pipeline = _base_.test_pipeline +base_test_pipeline[-1]['meta_keys'] = ('img_id', 'img_path', 'ori_shape', + 'img_shape', 'scale_factor', 'text', + 'custom_entities', 'caption_prompt') + +# ---------------------1 AerialMaritimeDrone_large---------------------# +class_name = ('boat', 'car', 'dock', 'jetski', 'lift') +metainfo = dict(classes=class_name) +_data_root = data_root + 'AerialMaritimeDrone/large/' +dataset_AerialMaritimeDrone_large = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_AerialMaritimeDrone_large = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------2 AerialMaritimeDrone_tiled---------------------# +class_name = ('boat', 'car', 'dock', 'jetski', 'lift') +metainfo = dict(classes=class_name) +_data_root = data_root + 'AerialMaritimeDrone/tiled/' +dataset_AerialMaritimeDrone_tiled = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_AerialMaritimeDrone_tiled = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------3 AmericanSignLanguageLetters---------------------# +class_name = ('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z') +metainfo = dict(classes=class_name) +_data_root = data_root + 'AmericanSignLanguageLetters/American Sign Language Letters.v1-v1.coco/' # noqa +dataset_AmericanSignLanguageLetters = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_AmericanSignLanguageLetters = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------4 Aquarium---------------------# +class_name = ('fish', 'jellyfish', 'penguin', 'puffin', 'shark', 'starfish', + 'stingray') +metainfo = dict(classes=class_name) +_data_root = data_root + 'Aquarium/Aquarium Combined.v2-raw-1024.coco/' +dataset_Aquarium = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_Aquarium = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------5 BCCD---------------------# +class_name = ('Platelets', 'RBC', 'WBC') +metainfo = dict(classes=class_name) +_data_root = data_root + 'BCCD/BCCD.v3-raw.coco/' +dataset_BCCD = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_BCCD = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------6 boggleBoards---------------------# +class_name = ('Q', 'a', 'an', 'b', 'c', 'd', 'e', 'er', 'f', 'g', 'h', 'he', + 'i', 'in', 'j', 'k', 'l', 'm', 'n', 'o', 'o ', 'p', 'q', 'qu', + 'r', 's', 't', 't\\', 'th', 'u', 'v', 'w', 'wild', 'x', 'y', 'z') +metainfo = dict(classes=class_name) +_data_root = data_root + 'boggleBoards/416x416AutoOrient/export/' +dataset_boggleBoards = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_boggleBoards = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------7 brackishUnderwater---------------------# +class_name = ('crab', 'fish', 'jellyfish', 'shrimp', 'small_fish', 'starfish') +metainfo = dict(classes=class_name) +_data_root = data_root + 'brackishUnderwater/960x540/' +dataset_brackishUnderwater = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_brackishUnderwater = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------8 ChessPieces---------------------# +class_name = (' ', 'black bishop', 'black king', 'black knight', 'black pawn', + 'black queen', 'black rook', 'white bishop', 'white king', + 'white knight', 'white pawn', 'white queen', 'white rook') +metainfo = dict(classes=class_name) +_data_root = data_root + 'ChessPieces/Chess Pieces.v23-raw.coco/' +dataset_ChessPieces = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/new_annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_ChessPieces = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/new_annotations_without_background.json', + metric='bbox') + +# ---------------------9 CottontailRabbits---------------------# +class_name = ('rabbit', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'CottontailRabbits/' +dataset_CottontailRabbits = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/new_annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_CottontailRabbits = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/new_annotations_without_background.json', + metric='bbox') + +# ---------------------10 dice---------------------# +class_name = ('1', '2', '3', '4', '5', '6') +metainfo = dict(classes=class_name) +_data_root = data_root + 'dice/mediumColor/export/' +dataset_dice = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_dice = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------11 DroneControl---------------------# +class_name = ('follow', 'follow_hand', 'land', 'land_hand', 'null', 'object', + 'takeoff', 'takeoff-hand') +metainfo = dict(classes=class_name) +_data_root = data_root + 'DroneControl/Drone Control.v3-raw.coco/' +dataset_DroneControl = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_DroneControl = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------12 EgoHands_generic---------------------# +class_name = ('hand', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'EgoHands/generic/' +caption_prompt = {'hand': {'suffix': ' of a person'}} +dataset_EgoHands_generic = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + # NOTE w. prompt 0.526, wo. prompt 0.608 + # caption_prompt=caption_prompt, + test_mode=True, + return_classes=True) +val_evaluator_EgoHands_generic = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------13 EgoHands_specific---------------------# +class_name = ('myleft', 'myright', 'yourleft', 'yourright') +metainfo = dict(classes=class_name) +_data_root = data_root + 'EgoHands/specific/' +dataset_EgoHands_specific = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_EgoHands_specific = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------14 HardHatWorkers---------------------# +class_name = ('head', 'helmet', 'person') +metainfo = dict(classes=class_name) +_data_root = data_root + 'HardHatWorkers/raw/' +dataset_HardHatWorkers = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_HardHatWorkers = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------15 MaskWearing---------------------# +class_name = ('mask', 'no-mask') +metainfo = dict(classes=class_name) +_data_root = data_root + 'MaskWearing/raw/' +dataset_MaskWearing = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_MaskWearing = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------16 MountainDewCommercial---------------------# +class_name = ('bottle', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'MountainDewCommercial/' +dataset_MountainDewCommercial = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_MountainDewCommercial = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------17 NorthAmericaMushrooms---------------------# +class_name = ('flat mushroom', 'yellow mushroom') +metainfo = dict(classes=class_name) +_data_root = data_root + 'NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/' # noqa +dataset_NorthAmericaMushrooms = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/new_annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_NorthAmericaMushrooms = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/new_annotations_without_background.json', + metric='bbox') + +# ---------------------18 openPoetryVision---------------------# +class_name = ('American Typewriter', 'Andale Mono', 'Apple Chancery', 'Arial', + 'Avenir', 'Baskerville', 'Big Caslon', 'Bradley Hand', + 'Brush Script MT', 'Chalkboard', 'Comic Sans MS', 'Copperplate', + 'Courier', 'Didot', 'Futura', 'Geneva', 'Georgia', 'Gill Sans', + 'Helvetica', 'Herculanum', 'Impact', 'Kefa', 'Lucida Grande', + 'Luminari', 'Marker Felt', 'Menlo', 'Monaco', 'Noteworthy', + 'Optima', 'PT Sans', 'PT Serif', 'Palatino', 'Papyrus', + 'Phosphate', 'Rockwell', 'SF Pro', 'SignPainter', 'Skia', + 'Snell Roundhand', 'Tahoma', 'Times New Roman', 'Trebuchet MS', + 'Verdana') +metainfo = dict(classes=class_name) +_data_root = data_root + 'openPoetryVision/512x512/' +dataset_openPoetryVision = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_openPoetryVision = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------19 OxfordPets_by_breed---------------------# +class_name = ('cat-Abyssinian', 'cat-Bengal', 'cat-Birman', 'cat-Bombay', + 'cat-British_Shorthair', 'cat-Egyptian_Mau', 'cat-Maine_Coon', + 'cat-Persian', 'cat-Ragdoll', 'cat-Russian_Blue', 'cat-Siamese', + 'cat-Sphynx', 'dog-american_bulldog', + 'dog-american_pit_bull_terrier', 'dog-basset_hound', + 'dog-beagle', 'dog-boxer', 'dog-chihuahua', + 'dog-english_cocker_spaniel', 'dog-english_setter', + 'dog-german_shorthaired', 'dog-great_pyrenees', 'dog-havanese', + 'dog-japanese_chin', 'dog-keeshond', 'dog-leonberger', + 'dog-miniature_pinscher', 'dog-newfoundland', 'dog-pomeranian', + 'dog-pug', 'dog-saint_bernard', 'dog-samoyed', + 'dog-scottish_terrier', 'dog-shiba_inu', + 'dog-staffordshire_bull_terrier', 'dog-wheaten_terrier', + 'dog-yorkshire_terrier') +metainfo = dict(classes=class_name) +_data_root = data_root + 'OxfordPets/by-breed/' # noqa +dataset_OxfordPets_by_breed = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_OxfordPets_by_breed = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------20 OxfordPets_by_species---------------------# +class_name = ('cat', 'dog') +metainfo = dict(classes=class_name) +_data_root = data_root + 'OxfordPets/by-species/' # noqa +dataset_OxfordPets_by_species = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_OxfordPets_by_species = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------21 PKLot---------------------# +class_name = ('space-empty', 'space-occupied') +metainfo = dict(classes=class_name) +_data_root = data_root + 'PKLot/640/' # noqa +dataset_PKLot = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_PKLot = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------22 Packages---------------------# +class_name = ('package', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'Packages/Raw/' +caption_prompt = { + 'package': { + 'prefix': 'there is a ', + 'suffix': ' on the porch' + } +} +dataset_Packages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=base_test_pipeline, + caption_prompt=caption_prompt, # NOTE w. prompt 0.695; wo. prompt 0.687 + test_mode=True, + return_classes=True) +val_evaluator_Packages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------23 PascalVOC---------------------# +class_name = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', + 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', + 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', + 'tvmonitor') +metainfo = dict(classes=class_name) +_data_root = data_root + 'PascalVOC/' +dataset_PascalVOC = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_PascalVOC = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------24 pistols---------------------# +class_name = ('pistol', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'pistols/export/' +dataset_pistols = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_pistols = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------25 plantdoc---------------------# +class_name = ('Apple Scab Leaf', 'Apple leaf', 'Apple rust leaf', + 'Bell_pepper leaf', 'Bell_pepper leaf spot', 'Blueberry leaf', + 'Cherry leaf', 'Corn Gray leaf spot', 'Corn leaf blight', + 'Corn rust leaf', 'Peach leaf', 'Potato leaf', + 'Potato leaf early blight', 'Potato leaf late blight', + 'Raspberry leaf', 'Soyabean leaf', 'Soybean leaf', + 'Squash Powdery mildew leaf', 'Strawberry leaf', + 'Tomato Early blight leaf', 'Tomato Septoria leaf spot', + 'Tomato leaf', 'Tomato leaf bacterial spot', + 'Tomato leaf late blight', 'Tomato leaf mosaic virus', + 'Tomato leaf yellow virus', 'Tomato mold leaf', + 'Tomato two spotted spider mites leaf', 'grape leaf', + 'grape leaf black rot') +metainfo = dict(classes=class_name) +_data_root = data_root + 'plantdoc/416x416/' +dataset_plantdoc = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_plantdoc = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------26 pothole---------------------# +class_name = ('pothole', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'pothole/' +caption_prompt = { + 'pothole': { + 'name': 'holes', + 'prefix': 'there are some', + 'suffix': ' on the road' + } +} +dataset_pothole = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + # NOTE w. prompt 0.137; wo. prompt 0.215 + # caption_prompt=caption_prompt, + pipeline=base_test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_pothole = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------27 Raccoon---------------------# +class_name = ('raccoon', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'Raccoon/Raccoon.v2-raw.coco/' +dataset_Raccoon = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_Raccoon = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------28 selfdrivingCar---------------------# +class_name = ('biker', 'car', 'pedestrian', 'trafficLight', + 'trafficLight-Green', 'trafficLight-GreenLeft', + 'trafficLight-Red', 'trafficLight-RedLeft', + 'trafficLight-Yellow', 'trafficLight-YellowLeft', 'truck') +metainfo = dict(classes=class_name) +_data_root = data_root + 'selfdrivingCar/fixedLarge/export/' +dataset_selfdrivingCar = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='val_annotations_without_background.json', + data_prefix=dict(img=''), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_selfdrivingCar = dict( + type='CocoMetric', + ann_file=_data_root + 'val_annotations_without_background.json', + metric='bbox') + +# ---------------------29 ShellfishOpenImages---------------------# +class_name = ('Crab', 'Lobster', 'Shrimp') +metainfo = dict(classes=class_name) +_data_root = data_root + 'ShellfishOpenImages/raw/' +dataset_ShellfishOpenImages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_ShellfishOpenImages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------30 ThermalCheetah---------------------# +class_name = ('cheetah', 'human') +metainfo = dict(classes=class_name) +_data_root = data_root + 'ThermalCheetah/' +dataset_ThermalCheetah = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_ThermalCheetah = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------31 thermalDogsAndPeople---------------------# +class_name = ('dog', 'person') +metainfo = dict(classes=class_name) +_data_root = data_root + 'thermalDogsAndPeople/' +dataset_thermalDogsAndPeople = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_thermalDogsAndPeople = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------32 UnoCards---------------------# +class_name = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', + '12', '13', '14') +metainfo = dict(classes=class_name) +_data_root = data_root + 'UnoCards/raw/' +dataset_UnoCards = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_UnoCards = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------33 VehiclesOpenImages---------------------# +class_name = ('Ambulance', 'Bus', 'Car', 'Motorcycle', 'Truck') +metainfo = dict(classes=class_name) +_data_root = data_root + 'VehiclesOpenImages/416x416/' +dataset_VehiclesOpenImages = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_VehiclesOpenImages = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------34 WildfireSmoke---------------------# +class_name = ('smoke', ) +metainfo = dict(classes=class_name) +_data_root = data_root + 'WildfireSmoke/' +dataset_WildfireSmoke = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_WildfireSmoke = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# ---------------------35 websiteScreenshots---------------------# +class_name = ('button', 'field', 'heading', 'iframe', 'image', 'label', 'link', + 'text') +metainfo = dict(classes=class_name) +_data_root = data_root + 'websiteScreenshots/' +dataset_websiteScreenshots = dict( + type=dataset_type, + metainfo=metainfo, + data_root=_data_root, + ann_file='valid/annotations_without_background.json', + data_prefix=dict(img='valid/'), + pipeline=_base_.test_pipeline, + test_mode=True, + return_classes=True) +val_evaluator_websiteScreenshots = dict( + type='CocoMetric', + ann_file=_data_root + 'valid/annotations_without_background.json', + metric='bbox') + +# --------------------- Config---------------------# + +dataset_prefixes = [ + 'AerialMaritimeDrone_large', + 'AerialMaritimeDrone_tiled', + 'AmericanSignLanguageLetters', + 'Aquarium', + 'BCCD', + 'boggleBoards', + 'brackishUnderwater', + 'ChessPieces', + 'CottontailRabbits', + 'dice', + 'DroneControl', + 'EgoHands_generic', + 'EgoHands_specific', + 'HardHatWorkers', + 'MaskWearing', + 'MountainDewCommercial', + 'NorthAmericaMushrooms', + 'openPoetryVision', + 'OxfordPets_by_breed', + 'OxfordPets_by_species', + 'PKLot', + 'Packages', + 'PascalVOC', + 'pistols', + 'plantdoc', + 'pothole', + 'Raccoons', + 'selfdrivingCar', + 'ShellfishOpenImages', + 'ThermalCheetah', + 'thermalDogsAndPeople', + 'UnoCards', + 'VehiclesOpenImages', + 'WildfireSmoke', + 'websiteScreenshots', +] + +datasets = [ + dataset_AerialMaritimeDrone_large, dataset_AerialMaritimeDrone_tiled, + dataset_AmericanSignLanguageLetters, dataset_Aquarium, dataset_BCCD, + dataset_boggleBoards, dataset_brackishUnderwater, dataset_ChessPieces, + dataset_CottontailRabbits, dataset_dice, dataset_DroneControl, + dataset_EgoHands_generic, dataset_EgoHands_specific, + dataset_HardHatWorkers, dataset_MaskWearing, dataset_MountainDewCommercial, + dataset_NorthAmericaMushrooms, dataset_openPoetryVision, + dataset_OxfordPets_by_breed, dataset_OxfordPets_by_species, dataset_PKLot, + dataset_Packages, dataset_PascalVOC, dataset_pistols, dataset_plantdoc, + dataset_pothole, dataset_Raccoon, dataset_selfdrivingCar, + dataset_ShellfishOpenImages, dataset_ThermalCheetah, + dataset_thermalDogsAndPeople, dataset_UnoCards, dataset_VehiclesOpenImages, + dataset_WildfireSmoke, dataset_websiteScreenshots +] + +metrics = [ + val_evaluator_AerialMaritimeDrone_large, + val_evaluator_AerialMaritimeDrone_tiled, + val_evaluator_AmericanSignLanguageLetters, val_evaluator_Aquarium, + val_evaluator_BCCD, val_evaluator_boggleBoards, + val_evaluator_brackishUnderwater, val_evaluator_ChessPieces, + val_evaluator_CottontailRabbits, val_evaluator_dice, + val_evaluator_DroneControl, val_evaluator_EgoHands_generic, + val_evaluator_EgoHands_specific, val_evaluator_HardHatWorkers, + val_evaluator_MaskWearing, val_evaluator_MountainDewCommercial, + val_evaluator_NorthAmericaMushrooms, val_evaluator_openPoetryVision, + val_evaluator_OxfordPets_by_breed, val_evaluator_OxfordPets_by_species, + val_evaluator_PKLot, val_evaluator_Packages, val_evaluator_PascalVOC, + val_evaluator_pistols, val_evaluator_plantdoc, val_evaluator_pothole, + val_evaluator_Raccoon, val_evaluator_selfdrivingCar, + val_evaluator_ShellfishOpenImages, val_evaluator_ThermalCheetah, + val_evaluator_thermalDogsAndPeople, val_evaluator_UnoCards, + val_evaluator_VehiclesOpenImages, val_evaluator_WildfireSmoke, + val_evaluator_websiteScreenshots +] + +# -------------------------------------------------# +val_dataloader = dict( + dataset=dict(_delete_=True, type='ConcatDataset', datasets=datasets)) +test_dataloader = val_dataloader + +val_evaluator = dict( + _delete_=True, + type='MultiDatasetsEvaluator', + metrics=metrics, + dataset_prefixes=dataset_prefixes) +test_evaluator = val_evaluator diff --git a/mmdet/models/detectors/grounding_dino.py b/mmdet/models/detectors/grounding_dino.py index 24518e62edd..cc6cccedf29 100644 --- a/mmdet/models/detectors/grounding_dino.py +++ b/mmdet/models/detectors/grounding_dino.py @@ -1,6 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. +import re import warnings -from typing import Dict, Tuple, Union +from typing import Dict, Optional, Tuple, Union import torch import torch.nn as nn @@ -8,6 +9,7 @@ from mmdet.registry import MODELS from mmdet.structures import OptSampleList, SampleList +from mmdet.utils import ConfigType from ..layers import SinePositionalEncoding from ..layers.transformer.grounding_dino_layers import ( GroundingDinoTransformerDecoder, GroundingDinoTransformerEncoder) @@ -16,6 +18,13 @@ run_ner) +def clean_label_name(name: str) -> str: + name = re.sub(r'\(.*\)', '', name) + name = re.sub(r'_', ' ', name) + name = re.sub(r' ', ' ', name) + return name + + @MODELS.register_module() class GroundingDINO(DINO): """Implementation of `Grounding DINO: Marrying DINO with Grounded Pre- @@ -64,10 +73,49 @@ def init_weights(self) -> None: nn.init.constant_(self.text_feat_map.bias.data, 0) nn.init.xavier_uniform_(self.text_feat_map.weight.data) + def to_enhance_text_prompts(self, original_caption, enhanced_text_prompts): + caption_string = '' + tokens_positive = [] + for idx, word in enumerate(original_caption): + if word in enhanced_text_prompts: + enhanced_text_dict = enhanced_text_prompts[word] + if 'prefix' in enhanced_text_dict: + caption_string += enhanced_text_dict['prefix'] + start_i = len(caption_string) + if 'name' in enhanced_text_dict: + caption_string += enhanced_text_dict['name'] + else: + caption_string += word + end_i = len(caption_string) + tokens_positive.append([[start_i, end_i]]) + + if 'suffix' in enhanced_text_dict: + caption_string += enhanced_text_dict['suffix'] + else: + tokens_positive.append( + [[len(caption_string), + len(caption_string) + len(word)]]) + caption_string += word + caption_string += self._special_tokens + return caption_string, tokens_positive + + def to_plain_text_prompts(self, original_caption): + caption_string = '' + tokens_positive = [] + for idx, word in enumerate(original_caption): + tokens_positive.append( + [[len(caption_string), + len(caption_string) + len(word)]]) + caption_string += word + caption_string += self._special_tokens + return caption_string, tokens_positive + def get_tokens_and_prompts( - self, - original_caption: Union[str, list, tuple], - custom_entities: bool = False) -> Tuple[dict, str, list]: + self, + original_caption: Union[str, list, tuple], + custom_entities: bool = False, + enhanced_text_prompts: Optional[ConfigType] = None + ) -> Tuple[dict, str, list]: """Get the tokens positive and prompts for the caption.""" if isinstance(original_caption, (list, tuple)) or custom_entities: if custom_entities and isinstance(original_caption, str): @@ -76,14 +124,15 @@ def get_tokens_and_prompts( original_caption = list( filter(lambda x: len(x) > 0, original_caption)) - caption_string = '' - tokens_positive = [] - for idx, word in enumerate(original_caption): - tokens_positive.append( - [[len(caption_string), - len(caption_string) + len(word)]]) - caption_string += word - caption_string += self._special_tokens + original_caption = [clean_label_name(i) for i in original_caption] + + if custom_entities and enhanced_text_prompts is not None: + caption_string, tokens_positive = self.to_enhance_text_prompts( + original_caption, enhanced_text_prompts) + else: + caption_string, tokens_positive = self.to_plain_text_prompts( + original_caption) + # NOTE: Tokenizer in Grounding DINO is different from # that in GLIP. The tokenizer in GLIP will pad the # caption_string to max_length, while the tokenizer @@ -123,9 +172,11 @@ def get_positive_map(self, tokenized, tokens_positive): return positive_map_label_to_token, positive_map def get_tokens_positive_and_prompts( - self, - original_caption: Union[str, list, tuple], - custom_entities: bool = False) -> Tuple[dict, str, Tensor, list]: + self, + original_caption: Union[str, list, tuple], + custom_entities: bool = False, + enhanced_text_prompt: Optional[ConfigType] = None + ) -> Tuple[dict, str, Tensor, list]: """Get the tokens positive and prompts for the caption. Args: @@ -141,7 +192,7 @@ def get_tokens_positive_and_prompts( """ tokenized, caption_string, tokens_positive, entities = \ self.get_tokens_and_prompts( - original_caption, custom_entities) + original_caption, custom_entities, enhanced_text_prompt) positive_map_label_to_token, positive_map = self.get_positive_map( tokenized, tokens_positive) return positive_map_label_to_token, caption_string, \ @@ -326,9 +377,15 @@ def loss(self, batch_inputs: Tensor, return losses def predict(self, batch_inputs, batch_data_samples, rescale: bool = True): - text_prompts = [ - data_samples.text for data_samples in batch_data_samples - ] + text_prompts = [] + enhanced_text_prompts = [] + for data_samples in batch_data_samples: + text_prompts.append(data_samples.text) + if 'caption_prompt' in data_samples: + enhanced_text_prompts.append(data_samples.caption_prompt) + else: + enhanced_text_prompts.append(None) + if 'custom_entities' in batch_data_samples[0]: # Assuming that the `custom_entities` flag # inside a batch is always the same. For single image inference @@ -339,14 +396,16 @@ def predict(self, batch_inputs, batch_data_samples, rescale: bool = True): # All the text prompts are the same, # so there is no need to calculate them multiple times. _positive_maps_and_prompts = [ - self.get_tokens_positive_and_prompts(text_prompts[0], - custom_entities) + self.get_tokens_positive_and_prompts( + text_prompts[0], custom_entities, enhanced_text_prompts[0]) ] * len(batch_inputs) else: _positive_maps_and_prompts = [ self.get_tokens_positive_and_prompts(text_prompt, - custom_entities) - for text_prompt in text_prompts + custom_entities, + enhanced_text_prompt) + for text_prompt, enhanced_text_prompt in zip( + text_prompts, enhanced_text_prompts) ] token_positive_maps, text_prompts, _, entities = zip( *_positive_maps_and_prompts)