Initial commit

RandyWangRaining · Apr 7, 2024 · cd74dba · cd74dba
commit cd74dba
Show file tree

Hide file tree

Showing 176 changed files with 316,178 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,5 @@
+.idea
+**__pycache__**
+**.DS_Store**
+stereo/config
+tools/lazyconfig_train.py
diff --git a/README.md b/README.md
@@ -0,0 +1,44 @@
+<img src="./misc/OpenStereo.png" alt="logo" />
+
+------------------------------------------
+OpenStereo is a flexible and extensible project for stereo matching.
+
+## What's New
+- **[January 2024]**: Our proposed StereoBase rank 1st on the [KITTI15 leaderboard](https://www.cvlibs.net/datasets/kitti/eval_scene_flow_detail.php?benchmark=stereo&result=52177cbd800e317dc999650fae31408deccebb4d])!!!
+- **[December 2023]**: Our paper makes public: [OpenStereo: A Comprehensive Benchmark for Stereo Matching and Strong Baseline](https://arxiv.org/abs/2312.00343)
+- **[March 2023]**:OpenStereo is available!!!
+
+## Highlighted features
+- **Mutiple Dataset supported**: OpenStereo supports six popular stereo datasets: [SceneFlow](datasets/SceneFlow/README.md), [KITTI12](datasets/KITTI12/README.md) & [KITTI15](datasets/KITTI15/README.md), 
+ [ETH3D](datasets/ETH3D/README.md),[Middlebury](datasets/Middlebury/README.md) and [DrivingStereo](datasets/DrivingStereo/README.md) .
+- **Multiple Models Support**: We reproduced several SOTA methods, and reached the same or even the better performance. 
+- **DDP Support**: The officially recommended [`Distributed Data Parallel (DDP)`](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html) mode is used during both the training and testing phases.
+- **AMP Support**: The [`Auto Mixed Precision (AMP)`](https://pytorch.org/tutorials/recipes/recipes/amp_recipe.html?highlight=amp) option is available.
+- **Nice log**: We use [`tensorboard`](https://pytorch.org/docs/stable/tensorboard.html) and `logging` to log everything, which looks pretty.
+
+
+## Getting Started
+
+Please see [0.get_started.md](docs/0.get_started.md). We also provide the following tutorials for your reference:
+- [Prepare dataset](docs/2.prepare_dataset.md)
+- [Detailed configuration](docs/3.detailed_config.md)
+- [Customize model](docs/4.how_to_create_your_model.md)
+- [Advanced usages](docs/5.advanced_usages.md) 
+
+## Model Zoo
+Results and models are available in the [model zoo](docs/1.model_zoo.md).
+
+
+## Acknowledgement
+[AANet](https://github.com/haofeixu/aanet) &nbsp; [ACVNet](https://github.com/gangweiX/ACVNet) &nbsp; [CascadeStereo](https://github.com/alibaba/cascade-stereo) &nbsp; [CFNet](https://github.com/gallenszl/CFNet) &nbsp; [COEX](https://github.com/antabangun/coex) &nbsp; [DenseMatching](https://github.com/DeepMotionAIResearch/DenseMatchingBenchmark) &nbsp; [FADNet++](https://github.com/HKBU-HPML/FADNet/tree/fadnet-pp) &nbsp; [GwcNet](https://github.com/xy-guo/GwcNet) &nbsp; [MSNet](https://github.com/cogsys-tuebingen/mobilestereonet) &nbsp; [PSMNet](https://github.com/JiaRenChang/PSMNet) &nbsp; [RAFT](https://github.com/princeton-vl/RAFT-Stereo) &nbsp; [STTR](https://github.com/mli0603/stereo-transformer) &nbsp; [OpenGait](https://github.com/ShiqiYu/OpenGait) &nbsp; [IGEV](https://github.com/gangweiX/IGEV/tree/main/IGEV-Stereo)
+
+## Citation
+```
+@article{OpenStereo,
+        title={OpenStereo: A Comprehensive Benchmark for Stereo Matching and Strong Baseline},
+        author={Guo, Xianda and Lu, Juntao and Zhang, Chenming and Wang, Yiqi and Duan, Yiqun and Yang, Tian and Zhu, Zheng and Chen, Long},
+        journal={arXiv preprint arXiv:2312.00343},
+        year={2023}
+}
+```
+**Note**: This code is only used for academic purposes, people cannot use this code for anything that might be considered commercial use.
diff --git a/cfgs/aanet/aanet_sceneflow.yaml b/cfgs/aanet/aanet_sceneflow.yaml
@@ -0,0 +1,81 @@
+DATA_CONFIG:
+    DATA_INFOS:
+        -   DATASET: SceneFlowDataset
+            DATA_SPLIT: {
+                TRAINING: ./data/sceneflow/sceneflow_cleanpass_train.txt,
+                EVALUATING: ./data/sceneflow/sceneflow_cleanpass_test.txt,
+                TESTING: ./data/sceneflow/sceneflow_cleanpass_test.txt
+            }
+            DATA_PATH: /yourpath/SceneFlow
+            RETURN_RIGHT_DISP: false
+
+    DATA_TRANSFORM:
+        TRAINING:
+            - { NAME: RandomCrop, SIZE: [ 288, 576 ], Y_JITTER: false }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+            - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] }
+        EVALUATING:
+            - { NAME: RightTopPad, SIZE: [ 576, 960 ] }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+            - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] }
+
+MODEL:
+    NAME: AANet
+    MAX_DISP: &max_disp 192
+    NUM_DOWNSAMPLE: 2
+    FEATURE_TYPE: aanet
+    NO_FEATURE_MDCONV: false
+    FEATURE_PYRAMID: false
+    FEATURE_PYRAMID_NETWORK: true
+    FEATURE_SIMILARITY: correlation
+    AGGREGATION_TYPE: adaptive
+    NUM_SCALES: 3
+    NUM_FUSIONS: 6
+    DEFORMABLE_GROUPS: 2
+    MDCONV_DILATION: 2
+    REFINEMENT_TYPE: stereodrnet
+    NO_INTERMEDIATE_SUPERVISION: False
+    NUM_STAGE_BLOCKS: 1
+    NUM_DEFORM_BLOCKS: 3
+    FIND_UNUSED_PARAMETERS: false
+    CKPT: -1
+    PRETRAINED_MODEL: ''
+
+OPTIMIZATION:
+    BATCH_SIZE_PER_GPU: 1
+    FREEZE_BN: false
+    SYNC_BN: true
+    AMP: false
+    NUM_EPOCHS: 64
+
+    OPTIMIZER:
+        NAME: Lamb
+        LR: 0.004
+
+    SCHEDULER:
+        NAME: MultiStepLR
+        GAMMA: 0.5
+        MILESTONES: [ 20, 40, 60 ]
+        ON_EPOCH: True
+        WARMUP:
+            WARM_STEPS: 100
+
+EVALUATOR:
+    BATCH_SIZE_PER_GPU: 10
+    MAX_DISP: *max_disp
+    METRIC:
+        - d1_all
+        - epe
+        - thres_1
+        - thres_2
+        - thres_3
+
+TRAINER:
+    EVAL_INTERVAL: 1
+    CKPT_SAVE_INTERVAL: 1
+    MAX_CKPT_SAVE_NUM: 30
+    LOGGER_ITER_INTERVAL: 10
+    TRAIN_VISUALIZATION: True
+    EVAL_VISUALIZATION: True
diff --git a/cfgs/casnet/casnet_psm_sceneflow_amp.yaml b/cfgs/casnet/casnet_psm_sceneflow_amp.yaml
@@ -0,0 +1,77 @@
+DATA_CONFIG:
+    DATA_INFOS:
+        -   DATASET: SceneFlowDataset
+            DATA_SPLIT: {
+                TRAINING: ./data/sceneflow/sceneflow_finalpass_train.txt,
+                EVALUATING: ./data/sceneflow/sceneflow_finalpass_test.txt,
+                TESTING: ./data/sceneflow/sceneflow_finalpass_test.txt
+            }
+            DATA_PATH: /yourpath/SceneFlow
+            RETURN_RIGHT_DISP: false
+
+    DATA_TRANSFORM:
+        TRAINING:
+            - { NAME: RandomCrop, SIZE: [ 256, 512 ], Y_JITTER: false }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+            - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] }
+        EVALUATING:
+            - { NAME: RightBottomCrop, SIZE: [ 512, 960 ] }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+            - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] }
+
+MODEL:
+    NAME: CasPSMNet
+    MAX_DISP: &max_disp 192
+    NDISPS: [ 48, 24 ]
+    DISP_INTERVAL_PIXEL: [ 4, 1 ]
+    USING_NS: true
+    NS_SIZE: 3
+    GRAD_METHOD: 'detach'
+    CR_BASE_CHS: [ 32, 32, 16 ]
+    FIND_UNUSED_PARAMETERS: false
+    CKPT: -1
+    PRETRAINED_MODEL: ''
+
+OPTIMIZATION:
+    BATCH_SIZE_PER_GPU: 12
+    FREEZE_BN: false
+    SYNC_BN: false
+    AMP: true
+    NUM_EPOCHS: 20
+
+    OPTIMIZER:
+        NAME: RMSprop
+        LR: 0.001
+
+    SCHEDULER:
+        NAME: MultiStepLR
+        GAMMA: 0.5
+        MILESTONES: [ 10, 12, 14, 16 ]
+        ON_EPOCH: True
+        WARMUP:
+            WARM_STEPS: 100
+
+    CLIP_GRAD:
+        TYPE: norm
+        MAX_NORM: 35
+        NORM_TYPE: 2
+
+EVALUATOR:
+    BATCH_SIZE_PER_GPU: 10
+    MAX_DISP: *max_disp
+    METRIC:
+        - d1_all
+        - epe
+        - thres_1
+        - thres_2
+        - thres_3
+
+TRAINER:
+    EVAL_INTERVAL: 1
+    CKPT_SAVE_INTERVAL: 1
+    MAX_CKPT_SAVE_NUM: 30
+    LOGGER_ITER_INTERVAL: 10
+    TRAIN_VISUALIZATION: True
+    EVAL_VISUALIZATION: True
diff --git a/cfgs/cfnet/cfnet_sceneflow_part2.yaml b/cfgs/cfnet/cfnet_sceneflow_part2.yaml
@@ -0,0 +1,67 @@
+DATA_CONFIG:
+    DATA_INFOS:
+        -   DATASET: SceneFlowDataset
+            DATA_SPLIT: {
+                TRAINING: ./data/sceneflow/sceneflow_finalpass_train.txt,
+                EVALUATING: ./data/sceneflow/sceneflow_finalpass_test.txt,
+                TESTING: ./data/sceneflow/sceneflow_finalpass_test.txt
+            }
+            DATA_PATH: /yourpath/SceneFlow
+            RETURN_RIGHT_DISP: false
+
+    DATA_TRANSFORM:
+        TRAINING:
+            - { NAME: RandomCrop, SIZE: [ 256, 512 ], Y_JITTER: false }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+            - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] }
+        EVALUATING:
+            - { NAME: RightBottomCrop, SIZE: [ 512, 960 ] }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+            - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] }
+
+MODEL:
+    NAME: CFNet
+    MAX_DISP: 256
+    REPLACE_MISH: false
+    FIND_UNUSED_PARAMETERS: false
+    CKPT: 20
+    PRETRAINED_MODEL: ''
+
+OPTIMIZATION:
+    BATCH_SIZE_PER_GPU: 4
+    FREEZE_BN: false
+    SYNC_BN: false
+    AMP: false
+    NUM_EPOCHS: 40
+
+    OPTIMIZER:
+        NAME: RMSprop
+        LR: 0.0001
+
+    SCHEDULER:
+        NAME: MultiStepLR
+        GAMMA: 0.1
+        MILESTONES: [ 10 ]
+        ON_EPOCH: True
+        WARMUP:
+            WARM_STEPS: 100
+
+EVALUATOR:
+    BATCH_SIZE_PER_GPU: 1
+    MAX_DISP: 192
+    METRIC:
+        - d1_all
+        - epe
+        - thres_1
+        - thres_2
+        - thres_3
+
+TRAINER:
+    EVAL_INTERVAL: 1
+    CKPT_SAVE_INTERVAL: 1
+    MAX_CKPT_SAVE_NUM: 30
+    LOGGER_ITER_INTERVAL: 10
+    TRAIN_VISUALIZATION: True
+    EVAL_VISUALIZATION: True
diff --git a/cfgs/coex/coex_sceneflow_amp.yaml b/cfgs/coex/coex_sceneflow_amp.yaml
@@ -0,0 +1,79 @@
+DATA_CONFIG:
+    DATA_INFOS:
+        -   DATASET: SceneFlowDataset
+            DATA_SPLIT: {
+                TRAINING: ./data/sceneflow/sceneflow_finalpass_train.txt,
+                EVALUATING: ./data/sceneflow/sceneflow_finalpass_test.txt,
+                TESTING: ./data/sceneflow/sceneflow_finalpass_test.txt
+            }
+            DATA_PATH: /yourpath/SceneFlow
+            RETURN_RIGHT_DISP: false
+
+    DATA_TRANSFORM:
+        TRAINING:
+            - { NAME: RandomCrop, SIZE: [ 288, 576 ], Y_JITTER: false }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+            - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] }
+        EVALUATING:
+            - { NAME: RightTopPad, SIZE: [ 540, 960 ] }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+            - { NAME: NormalizeImage, MEAN: [ 0.485, 0.456, 0.406 ], STD: [ 0.229, 0.224, 0.225 ] }
+
+MODEL:
+    NAME: CoExNet
+    MAX_DISP: &max_disp 192
+    SPIXEL_BRANCH_CHANNELS: [ 32, 48 ]
+    CHANS: [ 16, 24, 32, 96, 160 ]
+    MATCHING_WEIGHTED: False
+    MATCHING_HEAD: 1
+    GCE: true
+    AGGREGATION_DISP_STRIDES: 2
+    AGGREGATION_CHANNELS: [ 16, 32, 48 ]
+    AGGREGATION_BLOCKS_NUM: [ 2, 2, 2 ]
+    REGRESSION_TOPK: 2
+    FIND_UNUSED_PARAMETERS: false
+    CKPT: -1
+    PRETRAINED_MODEL: ''
+
+OPTIMIZATION:
+    BATCH_SIZE_PER_GPU: 8
+    FREEZE_BN: false
+    SYNC_BN: false
+    AMP: true
+    NUM_EPOCHS: 15
+
+    OPTIMIZER:
+        NAME: RMSprop
+        LR: 0.001
+
+    SCHEDULER:
+        NAME: MultiStepLR
+        GAMMA: 0.1
+        MILESTONES: [ 9, 12 ]
+        ON_EPOCH: True
+        WARMUP:
+            WARM_STEPS: 100
+
+    CLIP_GRAD:
+        TYPE: value
+        CLIP_VALUE: 0.1
+
+EVALUATOR:
+    BATCH_SIZE_PER_GPU: 10
+    MAX_DISP: *max_disp
+    METRIC:
+        - d1_all
+        - epe
+        - thres_1
+        - thres_2
+        - thres_3
+
+TRAINER:
+    EVAL_INTERVAL: 1
+    CKPT_SAVE_INTERVAL: 1
+    MAX_CKPT_SAVE_NUM: 30
+    LOGGER_ITER_INTERVAL: 10
+    TRAIN_VISUALIZATION: True
+    EVAL_VISUALIZATION: True
diff --git a/cfgs/data_basic.py b/cfgs/data_basic.py
@@ -0,0 +1,11 @@
+# @Time    : 2024/4/1 13:41
+# @Author  : zhangchenming
+DATA_PATH_DICT = {
+    'SceneFlowDataset': '/mnt/nas/algorithm/chenming.zhang/dataset/SceneFlow',
+    'FlyingThings3DSubsetDataset': '/mnt/nas/algorithm/chenming.zhang/dataset/SceneFlow',
+    'KittiDataset12': '/mnt/nas/algorithm/chenming.zhang/dataset/KITTI/stereo_2012',
+    'KittiDataset15': '/mnt/nas/algorithm/chenming.zhang/dataset/KITTI/stereo_2015',
+    'DrivingDataset': '/mnt/nas/algorithm/chenming.zhang/dataset/DrivingStereo',
+    'MiddleburyDataset': '/mnt/nas/algorithm/chenming.zhang/dataset/Middlebury',
+    'ETH3DDataset': '/mnt/nas/algorithm/chenming.zhang/dataset/ETH3D'
+}
diff --git a/cfgs/driving_eval.yaml b/cfgs/driving_eval.yaml
@@ -0,0 +1,24 @@
+DATA_CONFIG:
+    DATA_INFOS:
+        -   DATASET: DrivingDataset
+            DATA_SPLIT: {
+                EVALUATING: ./data/DrivingStereo/driving_stereo_half_test.txt
+            }
+            DATA_PATH: /yourpath/DrivingStereo
+            RETURN_RIGHT_DISP: false
+
+    DATA_TRANSFORM:
+        EVALUATING:
+            - { NAME: CropOrPad, SIZE: [ 800, 1760 ] }
+            - { NAME: TransposeImage }
+            - { NAME: ToTensor }
+
+EVALUATOR:
+    BATCH_SIZE_PER_GPU: 8
+    MAX_DISP: 192
+    METRIC:
+        - d1_all
+        - epe
+        - thres_1
+        - thres_2
+        - thres_3